diff options
author | Dan Gohman <gohman@apple.com> | 2008-11-25 00:52:40 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2008-11-25 00:52:40 +0000 |
commit | 21d9003087c9a707e6cd95460136b499df358fb8 (patch) | |
tree | 1cfc267392250dd28a6d3c70050e3dcd359b68d4 /lib | |
parent | 662165d2249746b01b154287d3f5ed92f6293c2b (diff) |
Initial support for anti-dependence breaking. Currently this code does not
introduce any new spilling; it just uses unused registers.
Refactor the SUnit topological sort code out of the RRList scheduler and
make use of it to help with the post-pass scheduler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59999 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/PostRASchedulerList.cpp | 439 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAG.cpp | 201 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 264 |
3 files changed, 643 insertions, 261 deletions
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index a2ad4e356a..ed24b8fea8 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -24,24 +24,32 @@ #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseSet.h" +#include <map> +#include <climits> using namespace llvm; STATISTIC(NumStalls, "Number of pipeline stalls"); +static cl::opt<bool> +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break scheduling anti-dependencies"), + cl::init(false)); + namespace { class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { public: static char ID; PostRAScheduler() : MachineFunctionPass(&ID) {} - private: - MachineFunction *MF; - const TargetMachine *TM; - public: + const char *getPassName() const { - return "Post RA top-down list latency scheduler (STUB)"; + return "Post RA top-down list latency scheduler"; } bool runOnMachineFunction(MachineFunction &Fn); @@ -49,13 +57,6 @@ namespace { char PostRAScheduler::ID = 0; class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { - public: - SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm) - : ScheduleDAGInstrs(mbb, tm) {} - private: - MachineFunction *MF; - const TargetMachine *TM; - /// AvailableQueue - The priority queue to use for the available SUnits. /// LatencyPriorityQueue AvailableQueue; @@ -66,12 +67,12 @@ namespace { /// added to the AvailableQueue. std::vector<SUnit*> PendingQueue; - public: - const char *getPassName() const { - return "Post RA top-down list latency scheduler (STUB)"; - } + /// Topo - A topological ordering for SUnits. + ScheduleDAGTopologicalSort Topo; - bool runOnMachineFunction(MachineFunction &Fn); + public: + SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm) + : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {} void Schedule(); @@ -79,19 +80,18 @@ namespace { void ReleaseSucc(SUnit *SU, SUnit *SuccSU, bool isChain); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); + bool BreakAntiDependencies(); }; } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DOUT << "PostRAScheduler\n"; - MF = &Fn; - TM = &MF->getTarget(); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { - SchedulePostRATDList Scheduler(MBB, *TM); + SchedulePostRATDList Scheduler(MBB, Fn.getTarget()); Scheduler.Run(); @@ -108,13 +108,407 @@ void SchedulePostRATDList::Schedule() { // Build scheduling units. BuildSchedUnits(); + if (EnableAntiDepBreaking) { + if (BreakAntiDependencies()) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + SUnits.clear(); + BuildSchedUnits(); + } + } + AvailableQueue.initNodes(SUnits); - + ListScheduleTopDown(); AvailableQueue.releaseState(); } +/// getInstrOperandRegClass - Return register class of the operand of an +/// instruction of the specified TargetInstrDesc. +static const TargetRegisterClass* +getInstrOperandRegClass(const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, const TargetInstrDesc &II, + unsigned Op) { + if (Op >= II.getNumOperands()) + return NULL; + if (II.OpInfo[Op].isLookupPtrRegClass()) + return TII->getPointerRegClass(); + return TRI->getRegClass(II.OpInfo[Op].RegClass); +} + +/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path +/// of the ScheduleDAG and break them by renaming registers. +/// +bool SchedulePostRATDList::BreakAntiDependencies() { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (BB->empty()) return false; + + Topo.InitDAGTopologicalSorting(); + + // Compute a critical path for the DAG. + SUnit *Max = 0; + std::vector<SDep *> CriticalPath(SUnits.size()); + for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), + E = Topo.end(); I != E; ++I) { + SUnit *SU = &SUnits[*I]; + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->Dep; + unsigned PredLatency = PredSU->CycleBound + PredSU->Latency; + if (SU->CycleBound < PredLatency) { + SU->CycleBound = PredLatency; + CriticalPath[*I] = &*P; + } + } + // Keep track of the node at the end of the critical path. + if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency) + Max = SU; + } + + DOUT << "Critical path has total latency " + << (Max ? Max->CycleBound + Max->Latency : 0) << "\n"; + + // Walk the critical path from the bottom up. Collect all anti-dependence + // edges on the critical path. Skip anti-dependencies between SUnits that + // are connected with other edges, since such units won't be able to be + // scheduled past each other anyway. + // + // The heuristic is that edges on the critical path are more important to + // break than other edges. And since there are a limited number of + // registers, we don't want to waste them breaking edges that aren't + // important. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + BitVector AllocatableSet = TRI->getAllocatableSet(*MF); + DenseMap<MachineInstr *, unsigned> CriticalAntiDeps; + for (SUnit *SU = Max; CriticalPath[SU->NodeNum]; + SU = CriticalPath[SU->NodeNum]->Dep) { + SDep *Edge = CriticalPath[SU->NodeNum]; + SUnit *NextSU = Edge->Dep; + unsigned AntiDepReg = Edge->Reg; + // Don't break anti-dependencies on non-allocatable registers. + if (!AllocatableSet.test(AntiDepReg)) + continue; + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency. + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) + if (P->Dep == NextSU ? + (!P->isAntiDep || P->Reg != AntiDepReg) : + (!P->isCtrl && !P->isAntiDep && P->Reg == AntiDepReg)) { + AntiDepReg = 0; + break; + } + if (AntiDepReg != 0) + CriticalAntiDeps[SU->getInstr()] = AntiDepReg; + } + + // For live regs that are only used in one register class in a live range, + // the register class. If the register is not live or is referenced in + // multiple register classes, the corresponding value is null. If the + // register is used in multiple register classes, the corresponding value + // is -1 casted to a pointer. + const TargetRegisterClass * + Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; + + // Map registers to all their references within a live range. + std::multimap<unsigned, MachineOperand *> RegRefs; + + // The index of the most recent kill (proceding bottom-up), or -1 if + // the register is not live. + unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::fill(KillIndices, array_endof(KillIndices), -1); + // The index of the most recent def (proceding bottom up), or -1 if + // the register is live. + unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::fill(DefIndices, array_endof(DefIndices), BB->size()); + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = -1; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = -1; + } + } + else + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = -1; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = -1; + } + } + + // Consider callee-saved registers as live-out, since we're running after + // prologue/epilogue insertion so there's no way to add additional + // saved registers. + // + // TODO: If the callee saves and restores these, then we can potentially + // use them between the save and the restore. To do that, we could scan + // the exit blocks to see which of these registers are defined. + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = -1; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = -1; + } + } + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + + // A registers defined and not used in an instruction. This is used for + // liveness tracking and is declared outside the loop only to avoid + // having it be re-allocated on each iteration. + DenseSet<unsigned> Defs; + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + bool Changed = false; + unsigned Count = BB->size() - 1; + for (MachineBasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); + I != E; ++I, --Count) { + MachineInstr *MI = &*I; + + // Check if this instruction has an anti-dependence that we're + // interested in. + DenseMap<MachineInstr *, unsigned>::iterator C = CriticalAntiDeps.find(MI); + unsigned AntiDepReg = C != CriticalAntiDeps.end() ? + C->second : 0; + + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = + getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); + + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + if (MO.isUse() && AntiDepReg == Reg) + AntiDepReg = 0; + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Now check for aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *Alias; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert(AntiDepReg == 0 || RC != NULL && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(*MF), + RE = RC->allocation_order_end(*MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg[AntiDepReg]) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == -1) != (DefIndices[AntiDepReg] == -1)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == -1) != (DefIndices[NewReg] == -1)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] == -1 && + KillIndices[AntiDepReg] <= DefIndices[NewReg]) { + DOUT << "Breaking anti-dependence edge on reg " << AntiDepReg + << " with reg " << NewReg << "!\n"; + + // Update the references to the old register to refer to the new + // register. + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = -1; + + RegRefs.erase(AntiDepReg); + Changed = true; + LastNewReg[AntiDepReg] = NewReg; + break; + } + } + } + + // Update liveness. + Defs.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isDef()) + Defs.insert(Reg); + else { + // Treat a use in the same instruction as a def as an extension of + // a live range. + Defs.erase(Reg); + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == -1) { + KillIndices[Reg] = Count; + DefIndices[Reg] = -1; + } + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Defs.erase(AliasReg); + if (KillIndices[AliasReg] == -1) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = -1; + } + } + } + } + // Proceding upwards, registers that are defed but not used in this + // instruction are now dead. + for (DenseSet<unsigned>::iterator D = Defs.begin(), DE = Defs.end(); + D != DE; ++D) { + unsigned Reg = *D; + DefIndices[Reg] = Count; + KillIndices[Reg] = -1; + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = -1; + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + } + } + assert(Count == -1u && "Count mismatch!"); + + return Changed; +} + //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -202,8 +596,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { } } - // If there are no instructions available, don't try to issue anything, and - // don't advance the hazard recognizer. + // If there are no instructions available, don't try to issue anything. if (AvailableQueue.empty()) { ++CurCycle; continue; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 046d3379a6..7088313fcd 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -263,3 +263,204 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { "The number of nodes scheduled doesn't match the expected number!"); } #endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->Dep; + if (!--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->Dep->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accomodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accomodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, bool& HasLoop) { + std::vector<const SUnit*> WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + while (!WorkList.empty()) { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + int s = SU->Succs[I].Dep->NodeNum; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].Dep); + } + } + } +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector<int> L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will +/// create a cycle. +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + if (IsReachable(TargetSU, SU)) + return true; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->Cost < 0 && IsReachable(TargetSU, I->Dep)) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( + std::vector<SUnit> &sunits) + : SUnits(sunits) {} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 16f9950295..5cbffc7c26 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -69,12 +69,16 @@ private: std::vector<SUnit*> LiveRegDefs; std::vector<unsigned> LiveRegCycles; + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + public: ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb, const TargetMachine &tm, bool isbottomup, SchedulingPriorityQueue *availqueue) : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), - AvailableQueue(availqueue) { + AvailableQueue(availqueue), Topo(SUnits) { } ~ScheduleDAGRRList() { @@ -84,22 +88,32 @@ public: void Schedule(); /// IsReachable - Checks if SU is reachable from TargetSU. - bool IsReachable(const SUnit *SU, const SUnit *TargetSU); + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will /// create a cycle. - bool WillCreateCycle(SUnit *SU, SUnit *TargetSU); + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } /// AddPred - This adds the specified node X as a predecessor of /// the current node Y if not already. /// This returns true if this is a new predecessor. /// Updates the topological ordering if required. bool AddPred(SUnit *Y, SUnit *X, bool isCtrl, bool isArtificial, - unsigned PhyReg = 0, int Cost = 1); + unsigned PhyReg = 0, int Cost = 1) { + Topo.AddPred(Y, X); + return Y->addPred(X, isCtrl, isArtificial, PhyReg, Cost); + } /// RemovePred - This removes the specified node N from the predecessors of /// the current node M. Updates the topological ordering if required. - bool RemovePred(SUnit *M, SUnit *N, bool isCtrl, bool isArtificial); + bool RemovePred(SUnit *M, SUnit *N, bool isCtrl, bool isArtificial) { + Topo.RemovePred(M, N); + return M->removePred(N, isCtrl, isArtificial, false); + } private: void ReleasePred(SUnit *SU, SUnit *PredSU, bool isChain); @@ -123,49 +137,24 @@ private: /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); SUnit *NewNode = NewSUnit(N); // Update the topological ordering. - if (NewNode->NodeNum >= Node2Index.size()) - InitDAGTopologicalSorting(); + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); return NewNode; } /// CreateClone - Creates a new SUnit from an existing one. /// Updates the topological ordering if required. SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); SUnit *NewNode = Clone(N); // Update the topological ordering. - if (NewNode->NodeNum >= Node2Index.size()) - InitDAGTopologicalSorting(); + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); return NewNode; } - - /// Functions for preserving the topological ordering - /// even after dynamic insertions of new edges. - /// This allows a very fast implementation of IsReachable. - - /// InitDAGTopologicalSorting - create the initial topological - /// ordering from the DAG to be scheduled. - void InitDAGTopologicalSorting(); - - /// DFS - make a DFS traversal and mark all nodes affected by the - /// edge insertion. These nodes will later get new topological indexes - /// by means of the Shift method. - void DFS(const SUnit *SU, int UpperBound, bool& HasLoop); - - /// Shift - reassign topological indexes for the nodes in the DAG - /// to preserve the topological ordering. - void Shift(BitVector& Visited, int LowerBound, int UpperBound); - - /// Allocate - assign the topological index to the node n. - void Allocate(int n, int index); - - /// Index2Node - Maps topological index to the node number. - std::vector<int> Index2Node; - /// Node2Index - Maps the node number to its topological index. - std::vector<int> Node2Index; - /// Visited - a set of nodes visited during a DFS traversal. - BitVector Visited; }; } // end anonymous namespace @@ -185,7 +174,7 @@ void ScheduleDAGRRList::Schedule() { SUnits[su].dumpAll(this)); CalculateDepths(); CalculateHeights(); - InitDAGTopologicalSorting(); + Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -374,207 +363,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { AvailableQueue->push(SU); } -/// IsReachable - Checks if SU is reachable from TargetSU. -bool ScheduleDAGRRList::IsReachable(const SUnit *SU, const SUnit *TargetSU) { - // If insertion of the edge SU->TargetSU would create a cycle - // then there is a path from TargetSU to SU. - int UpperBound, LowerBound; - LowerBound = Node2Index[TargetSU->NodeNum]; - UpperBound = Node2Index[SU->NodeNum]; - bool HasLoop = false; - // Is Ord(TargetSU) < Ord(SU) ? - if (LowerBound < UpperBound) { - Visited.reset(); - // There may be a path from TargetSU to SU. Check for it. - DFS(TargetSU, UpperBound, HasLoop); - } - return HasLoop; -} - -/// Allocate - assign the topological index to the node n. -inline void ScheduleDAGRRList::Allocate(int n, int index) { - Node2Index[n] = index; - Index2Node[index] = n; -} - -/// InitDAGTopologicalSorting - create the initial topological -/// ordering from the DAG to be scheduled. - -/// The idea of the algorithm is taken from -/// "Online algorithms for managing the topological order of -/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly -/// This is the MNR algorithm, which was first introduced by -/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in -/// "Maintaining a topological order under edge insertions". -/// -/// Short description of the algorithm: -/// -/// Topological ordering, ord, of a DAG maps each node to a topological -/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). -/// -/// This means that if there is a path from the node X to the node Z, -/// then ord(X) < ord(Z). -/// -/// This property can be used to check for reachability of nodes: -/// if Z is reachable from X, then an insertion of the edge Z->X would -/// create a cycle. -/// -/// The algorithm first computes a topological ordering for the DAG by -/// initializing the Index2Node and Node2Index arrays and then tries to keep -/// the ordering up-to-date after edge insertions by reordering the DAG. -/// -/// On insertion of the edge X->Y, the algorithm first marks by calling DFS -/// the nodes reachable from Y, and then shifts them using Shift to lie -/// immediately after X in Index2Node. -void ScheduleDAGRRList::InitDAGTopologicalSorting() { - unsigned DAGSize = SUnits.size(); - std::vector<SUnit*> WorkList; - WorkList.reserve(DAGSize); - - Index2Node.resize(DAGSize); - Node2Index.resize(DAGSize); - - // Initialize the data structures. - for (unsigned i = 0, e = DAGSize; i != e; ++i) { - SUnit *SU = &SUnits[i];< |