diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 288 |
1 files changed, 238 insertions, 50 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 3b418ba82b..cdd58074b2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -20,6 +20,8 @@ #define DEBUG_TYPE "sched" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -33,6 +35,11 @@ using namespace llvm; namespace { + cl::opt<bool> + SchedVertically("sched-vertically", cl::Hidden); +} + +namespace { Statistic<> NumNoops ("scheduler", "Number of noops inserted"); Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); @@ -63,10 +70,10 @@ namespace { SUnit(SDNode *node, unsigned nodenum) : Node(node), NumPredsLeft(0), NumSuccsLeft(0), - NumChainPredsLeft(0), NumChainSuccsLeft(0), - isTwoAddress(false), isDefNUseOperand(false), isPending(false), - isAvailable(false), isScheduled(false), - Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {} + NumChainPredsLeft(0), NumChainSuccsLeft(0), + isTwoAddress(false), isDefNUseOperand(false), + isPending(false), isAvailable(false), isScheduled(false), + Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {} void dump(const SelectionDAG *G) const; void dumpAll(const SelectionDAG *G) const; @@ -141,6 +148,8 @@ public: virtual void push_all(const std::vector<SUnit *> &Nodes) = 0; virtual SUnit *pop() = 0; + + virtual void RemoveFromPriorityQueue(SUnit *SU) = 0; /// ScheduledNode - As each node is scheduled, this method is invoked. This /// allows the priority function to adjust the priority of node that have @@ -160,6 +169,7 @@ class ScheduleDAGList : public ScheduleDAG { private: // SDNode to SUnit mapping (many to one). std::map<SDNode*, SUnit*> SUnitMap; + // The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; @@ -180,10 +190,16 @@ private: /// added to the AvailableQueue. This keeps track of each SUnit and the /// number of cycles left to execute before the operation is available. std::vector<std::pair<unsigned, SUnit*> > PendingQueue; - + /// HazardRec - The hazard recognizer to use. HazardRecognizer *HazardRec; - + + /// OpenNodes - Nodes with open live ranges, i.e. predecessors or successors + /// of scheduled nodes which are not themselves scheduled. + std::map<const TargetRegisterClass*, std::set<SUnit*> > OpenNodes; + + std::map<const TargetRegisterClass*, unsigned> RegPressureLimits; + public: ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, const TargetMachine &tm, bool isbottomup, @@ -206,7 +222,8 @@ private: SUnit *NewSUnit(SDNode *N); void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); void ReleaseSucc(SUnit *SuccSU, bool isChain); - void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, bool Veritical=true); + void ScheduleVertically(SUnit *SU, unsigned& CurCycle); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); void ListScheduleBottomUp(); @@ -424,6 +441,41 @@ void ScheduleDAGList::Schedule() { // Bottom-Up Scheduling //===----------------------------------------------------------------------===// +static const TargetRegisterClass *getRegClass(SUnit *SU, + const TargetInstrInfo *TII, + const MRegisterInfo *MRI, + SSARegMap *RegMap) { + if (SU->Node->isTargetOpcode()) { + unsigned Opc = SU->Node->getTargetOpcode(); + const TargetInstrDescriptor &II = TII->get(Opc); + return II.OpInfo->RegClass; + } else { + assert(SU->Node->getOpcode() == ISD::CopyFromReg); + unsigned SrcReg = cast<RegisterSDNode>(SU->Node->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(SrcReg)) + return RegMap->getRegClass(SrcReg); + else { + for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), + E = MRI->regclass_end(); I != E; ++I) + if ((*I)->hasType(SU->Node->getValueType(0)) && + (*I)->contains(SrcReg)) + return *I; + assert(false && "Couldn't find register class for reg copy!"); + } + return NULL; + } +} + +static unsigned getNumResults(SUnit *SU) { + unsigned NumResults = 0; + for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) { + MVT::ValueType VT = SU->Node->getValueType(i); + if (VT != MVT::Other && VT != MVT::Flag) + NumResults++; + } + return NumResults; +} + /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to /// the Available queue is the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain, @@ -455,11 +507,106 @@ void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain, AvailableQueue->push(PredSU); } } + + if (getNumResults(PredSU) > 0) { + const TargetRegisterClass *RegClass = getRegClass(PredSU, TII, MRI, RegMap); + OpenNodes[RegClass].insert(PredSU); + } +} + +/// SharesOperandWithTwoAddr - Check if there is a unscheduled two-address node +/// with which SU shares an operand. If so, returns the node. +static SUnit *SharesOperandWithTwoAddr(SUnit *SU) { + assert(!SU->isTwoAddress && "Node cannot be two-address op"); + for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; + SUnit *PredSU = I->first; + for (std::set<std::pair<SUnit*, bool> >::iterator II = + PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { + if (II->second) continue; + SUnit *SSU = II->first; + if (SSU->isTwoAddress && !SSU->isScheduled) { + return SSU; + } + } + } + return NULL; } + +static bool isFloater(const SUnit *SU) { + unsigned Opc = SU->Node->getOpcode(); + return (Opc != ISD::CopyFromReg && SU->NumPredsLeft == 0); +} + +static bool isSimpleFloaterUse(const SUnit *SU) { + unsigned NumOps = 0; + for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; + if (++NumOps > 1) + return false; + if (!isFloater(I->first)) + return false; + } + return true; +} + +/// ScheduleVertically - Schedule vertically. That is, follow up the D&U chain +/// (of two-address code) and schedule floaters aggressively. +void ScheduleDAGList::ScheduleVertically(SUnit *SU, unsigned& CurCycle) { + // Try scheduling Def&Use operand if register pressure is low. + const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); + unsigned Pressure = OpenNodes[RegClass].size(); + unsigned Limit = RegPressureLimits[RegClass]; + + // See if we can schedule any predecessor that takes no registers. + for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; + + SUnit *PredSU = I->first; + if (!PredSU->isAvailable || PredSU->isScheduled) + continue; + + if (isFloater(PredSU)) { + DEBUG(std::cerr<<"*** Scheduling floater\n"); + AvailableQueue->RemoveFromPriorityQueue(PredSU); + ScheduleNodeBottomUp(PredSU, CurCycle, false); + } + } + + SUnit *DUSU = NULL; + if (SU->isTwoAddress && Pressure < Limit) { + DUSU = SUnitMap[SU->Node->getOperand(0).Val]; + if (!DUSU->isAvailable || DUSU->isScheduled) + DUSU = NULL; + else if (!DUSU->isTwoAddress) { + SUnit *SSU = SharesOperandWithTwoAddr(DUSU); + if (SSU && SSU->isAvailable) { + AvailableQueue->RemoveFromPriorityQueue(SSU); + ScheduleNodeBottomUp(SSU, CurCycle, false); + Pressure = OpenNodes[RegClass].size(); + if (Pressure >= Limit) + DUSU = NULL; + } + } + } + + if (DUSU) { + DEBUG(std::cerr<<"*** Low register pressure: scheduling D&U operand\n"); + AvailableQueue->RemoveFromPriorityQueue(DUSU); + ScheduleNodeBottomUp(DUSU, CurCycle, false); + Pressure = OpenNodes[RegClass].size(); + ScheduleVertically(DUSU, CurCycle); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. -void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, + bool Vertical) { DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(&DAG)); SU->Cycle = CurCycle; @@ -469,48 +616,66 @@ void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { // Bottom up: release predecessors for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { + E = SU->Preds.end(); I != E; ++I) ReleasePred(I->first, I->second, CurCycle); - // FIXME: This is something used by the priority function that it should - // calculate directly. - if (!I->second) - SU->NumPredsLeft--; - else - SU->NumChainPredsLeft--; + SU->isScheduled = true; + CurCycle++; + + if (getNumResults(SU) != 0) { + const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); + OpenNodes[RegClass].erase(SU); + + if (SchedVertically && Vertical) + ScheduleVertically(SU, CurCycle); } } /// isReady - True if node's lower cycle bound is less or equal to the current /// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurrCycle) { - return SU->CycleBound <= CurrCycle; +static inline bool isReady(SUnit *SU, unsigned CurCycle) { + return SU->CycleBound <= CurCycle; } /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up /// schedulers. void ScheduleDAGList::ListScheduleBottomUp() { - unsigned CurrCycle = 0; + // Determine rough register pressure limit. + for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(), + E = MRI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + unsigned Limit = RC->getNumRegs(); + Limit = (Limit > 2) ? Limit - 2 : 0; + std::map<const TargetRegisterClass*, unsigned>::iterator RPI = + RegPressureLimits.find(RC); + if (RPI == RegPressureLimits.end()) + RegPressureLimits[RC] = Limit; + else { + unsigned &OldLimit = RegPressureLimits[RC]; + if (Limit < OldLimit) + OldLimit = Limit; + } + } + + unsigned CurCycle = 0; // Add root to Available queue. AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; - SUnit *CurrNode = NULL; + SUnit *CurNode = NULL; while (!AvailableQueue->empty()) { - SUnit *CurrNode = AvailableQueue->pop(); - while (!isReady(CurrNode, CurrCycle)) { - NotReady.push_back(CurrNode); - CurrNode = AvailableQueue->pop(); + SUnit *CurNode = AvailableQueue->pop(); + while (!isReady(CurNode, CurCycle)) { + NotReady.push_back(CurNode); + CurNode = AvailableQueue->pop(); } // Add the nodes that aren't ready back onto the available list. AvailableQueue->push_all(NotReady); NotReady.clear(); - ScheduleNodeBottomUp(CurrNode, CurrCycle); - CurrCycle++; - CurrNode->isScheduled = true; + ScheduleNodeBottomUp(CurNode, CurCycle); } // Add entry node last @@ -574,7 +739,6 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { } PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU)); - SuccSU->isPending = true; } } @@ -782,6 +946,27 @@ namespace { return V; } + /// RemoveFromPriorityQueue - This is a really inefficient way to remove a + /// node from a priority queue. We should roll our own heap to make this + /// better or something. + void RemoveFromPriorityQueue(SUnit *SU) { + std::vector<SUnit*> Temp; + + assert(!Queue.empty() && "Not in queue!"); + while (Queue.top() != SU) { + Temp.push_back(Queue.top()); + Queue.pop(); + assert(!Queue.empty() && "Not in queue!"); + } + + // Remove the node from the PQ. + Queue.pop(); + + // Add all the other nodes back. + for (unsigned i = 0, e = Temp.size(); i != e; ++i) + Queue.push(Temp[i]); + } + private: void CalculatePriorities(); int CalcNodePriority(const SUnit *SU); @@ -797,13 +982,16 @@ bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { int RPriority = SPQ->getSethiUllmanNumber(RightNum); bool LIsFloater = LIsTarget && (LPriority == 1 || LPriority == 0); bool RIsFloater = RIsTarget && (RPriority == 1 || RPriority == 0); + int LBonus = 0; + int RBonus = 0; - // Schedule floaters (e.g. load from some constant address) and immediate use - // of floaters (with no other operands) just before the use. - if (LIsFloater && !RIsFloater) - LPriority += 2; - else if (!LIsFloater && RIsFloater) - RPriority += 2; + // Schedule floaters (e.g. load from some constant address) and those nodes + // with a single predecessor each first. They maintain / reduce register + // pressure. + if (LIsFloater) + LBonus += 2; + if (RIsFloater) + RBonus += 2; // Special tie breaker: if two nodes share a operand, the one that use it // as a def&use operand is preferred. @@ -811,21 +999,21 @@ bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (left->isTwoAddress && !right->isTwoAddress) { SDNode *DUNode = left->Node->getOperand(0).Val; if (DUNode->isOperand(right->Node)) - LPriority += 2; + LBonus += 2; } if (!left->isTwoAddress && right->isTwoAddress) { SDNode *DUNode = right->Node->getOperand(0).Val; if (DUNode->isOperand(left->Node)) - RPriority += 2; + RBonus += 2; } } - if (LPriority < RPriority) + if (LPriority+LBonus < RPriority+RBonus) return true; - else if (LPriority == RPriority) + else if (LPriority+LBonus == RPriority+RBonus) if (left->NumPredsLeft > right->NumPredsLeft) return true; - else if (left->NumPredsLeft == right->NumPredsLeft) + else if (left->NumPredsLeft+LBonus == right->NumPredsLeft+RBonus) if (left->CycleBound > right->CycleBound) return true; return false; @@ -955,18 +1143,7 @@ public: Queue.pop(); return V; } - - // ScheduledNode - As nodes are scheduled, we look to see if there are any - // successor nodes that have a single unscheduled predecessor. If so, that - // single predecessor has a higher priority, since scheduling it will make - // the node available. - void ScheduledNode(SUnit *Node); - -private: - void CalculatePriorities(); - int CalcLatency(const SUnit &SU); - void AdjustPriorityOfUnscheduledPreds(SUnit *SU); - + /// RemoveFromPriorityQueue - This is a really inefficient way to remove a /// node from a priority queue. We should roll our own heap to make this /// better or something. @@ -987,6 +1164,17 @@ private: for (unsigned i = 0, e = Temp.size(); i != e; ++i) Queue.push(Temp[i]); } + + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); }; } |