diff options
author | Evan Cheng <evan.cheng@apple.com> | 2007-09-25 01:54:36 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2007-09-25 01:54:36 +0000 |
commit | a6fb1b6743ee1411accf2d6e636f73f2ee0a7f5b (patch) | |
tree | 05eb93ca35bcfe154a16403c81b638ae3abf777b /lib/CodeGen | |
parent | a3602685b34f4c9a1602fdc7fb120a7f51228736 (diff) |
Added major new capabilities to scheduler (only BURR for now) to support physical register dependency. The BURR scheduler can now backtrace and duplicate instructions in order to avoid "expensive / impossible to copy" values (e.g. status flag EFLAGS for x86) from being clobbered.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42284 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAG.cpp | 182 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 52 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 469 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 |
5 files changed, 559 insertions, 150 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index 13465957b9..b77512228c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -27,6 +27,65 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; + +/// getPhysicalRegisterRegClass - Returns the Register Class of a physical +/// register. +static const TargetRegisterClass *getPhysicalRegisterRegClass( + const MRegisterInfo *MRI, + MVT::ValueType VT, + unsigned reg) { + assert(MRegisterInfo::isPhysicalRegister(reg) && + "reg must be a physical register"); + // Pick the register class of the right type that contains this physreg. + for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), + E = MRI->regclass_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->contains(reg)) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *Use, unsigned Op, + const MRegisterInfo *MRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || Use->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = Use->getOperand(2).ResNo; + if (Def->isTargetOpcode()) { + const TargetInstrDescriptor &II = TII->get(Def->getTargetOpcode()); + if (ResNo >= II.numDefs && + II.ImplicitDefs[ResNo - II.numDefs] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + getPhysicalRegisterRegClass(MRI, Def->getValueType(ResNo), Reg); + Cost = RC->getCopyCost(); + } + } +} + +SUnit *ScheduleDAG::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->Node); + for (unsigned i = 0, e = SU->FlaggedNodes.size(); i != e; ++i) + SU->FlaggedNodes.push_back(SU->FlaggedNodes[i]); + SU->InstanceNo = SUnitMap[Old->Node].size(); + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasImplicitDefs = Old->hasImplicitDefs; + SUnitMap[Old->Node].push_back(SU); + return SU; +} + /// BuildSchedUnits - Build SUnits from the selection dag that we are input. /// This SUnit graph is similar to the SelectionDAG, but represents flagged /// together nodes with a single SUnit. @@ -44,7 +103,7 @@ void ScheduleDAG::BuildSchedUnits() { continue; // If this node has already been processed, stop now. - if (SUnitMap[NI]) continue; + if (SUnitMap[NI].size()) continue; SUnit *NodeSUnit = NewSUnit(NI); @@ -59,7 +118,7 @@ void ScheduleDAG::BuildSchedUnits() { do { N = N->getOperand(N->getNumOperands()-1).Val; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); } while (N->getNumOperands() && N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag); std::reverse(NodeSUnit->FlaggedNodes.begin(), @@ -79,7 +138,7 @@ void ScheduleDAG::BuildSchedUnits() { if (FlagVal.isOperand(*UI)) { HasFlagUse = true; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); N = *UI; break; } @@ -89,7 +148,7 @@ void ScheduleDAG::BuildSchedUnits() { // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. // Update the SUnit NodeSUnit->Node = N; - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. @@ -125,13 +184,16 @@ void ScheduleDAG::BuildSchedUnits() { if (MainNode->isTargetOpcode()) { unsigned Opc = MainNode->getTargetOpcode(); - for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) { - if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) { + const TargetInstrDescriptor &TID = TII->get(Opc); + if (TID.ImplicitDefs) + SU->hasImplicitDefs = true; + for (unsigned i = 0; i != TID.numOperands; ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TII->isCommutableInstr(Opc)) + if (TID.Flags & M_COMMUTABLE) SU->isCommutable = true; } @@ -141,34 +203,25 @@ void ScheduleDAG::BuildSchedUnits() { for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { SDNode *N = SU->FlaggedNodes[n]; + if (N->isTargetOpcode() && TII->getImplicitDefs(N->getTargetOpcode())) + SU->hasImplicitDefs = true; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).Val; if (isPassiveNode(OpN)) continue; // Not scheduled. - SUnit *OpSU = SUnitMap[OpN]; + SUnit *OpSU = SUnitMap[OpN].front(); assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. MVT::ValueType OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; - - if (SU->addPred(OpSU, isChain)) { - if (!isChain) { - SU->NumPreds++; - SU->NumPredsLeft++; - } else { - SU->NumChainPredsLeft++; - } - } - if (OpSU->addSucc(SU, isChain)) { - if (!isChain) { - OpSU->NumSuccs++; - OpSU->NumSuccsLeft++; - } else { - OpSU->NumChainSuccsLeft++; - } - } + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, MRI, TII, PhysReg, Cost); + SU->addPred(OpSU, isChain, false, PhysReg, Cost); } } @@ -200,7 +253,7 @@ void ScheduleDAG::CalculateDepths() { void ScheduleDAG::CalculateHeights() { std::vector<std::pair<SUnit*, unsigned> > WorkList; - SUnit *Root = SUnitMap[DAG.getRoot().Val]; + SUnit *Root = SUnitMap[DAG.getRoot().Val].front(); WorkList.push_back(std::make_pair(Root, 0U)); while (!WorkList.empty()) { @@ -254,27 +307,14 @@ static const TargetRegisterClass *getInstrOperandRegClass( ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass); } -// Returns the Register Class of a physical register -static const TargetRegisterClass *getPhysicalRegisterRegClass( - const MRegisterInfo *MRI, - MVT::ValueType VT, - unsigned reg) { - assert(MRegisterInfo::isPhysicalRegister(reg) && - "reg must be a physical register"); - // Pick the register class of the right type that contains this physreg. - for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), - E = MRI->regclass_end(); I != E; ++I) - if ((*I)->hasType(VT) && (*I)->contains(reg)) - return *I; - assert(false && "Couldn't find the register class"); - return 0; -} - -void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned SrcReg, +void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, + unsigned InstanceNo, unsigned SrcReg, DenseMap<SDOperand, unsigned> &VRBaseMap) { unsigned VRBase = 0; if (MRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! + if (InstanceNo > 0) + VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo),SrcReg)); assert(isNew && "Node emitted out of order - early"); return; @@ -282,32 +322,54 @@ void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned SrcReg, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *Use = *UI; + bool Match = true; if (Use->getOpcode() == ISD::CopyToReg && Use->getOperand(2).Val == Node && Use->getOperand(2).ResNo == ResNo) { unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); if (MRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; - break; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + SDOperand Op = Use->getOperand(i); + if (Op.Val != Node) + continue; + MVT::ValueType VT = Node->getValueType(Op.ResNo); + if (VT != MVT::Other && VT != MVT::Flag) + Match = false; } } + MatchReg &= Match; + if (VRBase) + break; } - // Figure out the register class to create for the destreg. const TargetRegisterClass *TRC = 0; - if (VRBase) { + // Figure out the register class to create for the destreg. + if (VRBase) TRC = RegMap->getRegClass(VRBase); - } else { + else TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(ResNo), SrcReg); - + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && TRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { // Create the reg, emit the copy. VRBase = RegMap->createVirtualRegister(TRC); + MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC); } - MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC); + if (InstanceNo > 0) + VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo), VRBase)); assert(isNew && "Node emitted out of order - early"); } @@ -611,7 +673,7 @@ void ScheduleDAG::EmitSubregNode(SDNode *Node, /// EmitNode - Generate machine code for an node and needed dependencies. /// -void ScheduleDAG::EmitNode(SDNode *Node, +void ScheduleDAG::EmitNode(SDNode *Node, unsigned InstanceNo, DenseMap<SDOperand, unsigned> &VRBaseMap) { // If machine instruction if (Node->isTargetOpcode()) { @@ -677,7 +739,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, for (unsigned i = II.numDefs; i < NumResults; ++i) { unsigned Reg = II.ImplicitDefs[i - II.numDefs]; if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, Reg, VRBaseMap); + EmitCopyFromReg(Node, i, InstanceNo, Reg, VRBaseMap); } } } else { @@ -713,7 +775,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, SrcReg, VRBaseMap); + EmitCopyFromReg(Node, 0, InstanceNo, SrcReg, VRBaseMap); break; } case ISD::INLINEASM: { @@ -802,9 +864,9 @@ void ScheduleDAG::EmitSchedule() { DenseMap<SDOperand, unsigned> VRBaseMap; for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) { - for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) - EmitNode(SU->FlaggedNodes[j], VRBaseMap); - EmitNode(SU->Node, VRBaseMap); + for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; ++j) + EmitNode(SU->FlaggedNodes[j], SU->InstanceNo, VRBaseMap); + EmitNode(SU->Node, SU->InstanceNo, VRBaseMap); } else { // Null SUnit* is a noop. EmitNoop(); @@ -869,7 +931,10 @@ void SUnit::dumpAll(const SelectionDAG *G) const { cerr << " ch #"; else cerr << " val #"; - cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")\n"; + cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")"; + if (I->isSpecial) + cerr << " *"; + cerr << "\n"; } } if (Succs.size() != 0) { @@ -880,7 +945,10 @@ void SUnit::dumpAll(const SelectionDAG *G) const { cerr << " ch #"; else cerr << " val #"; - cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")\n"; + cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")"; + if (I->isSpecial) + cerr << " *"; + cerr << "\n"; } } cerr << "\n"; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 7b09749462..33d7910507 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -168,7 +168,7 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGList::ListScheduleTopDown() { unsigned CurCycle = 0; - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); // All leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { @@ -328,12 +328,24 @@ public: LatencyPriorityQueue() : Queue(latency_sort(this)) { } - void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, std::vector<SUnit> &sunits) { SUnits = &sunits; // Calculate node priorities. CalculatePriorities(); } + + void addNode(const SUnit *SU) { + Latencies.resize(SUnits->size(), -1); + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + CalcLatency(*SU); + } + + void updateNode(const SUnit *SU) { + Latencies[SU->NodeNum] = -1; + CalcLatency(*SU); + } + void releaseState() { SUnits = 0; Latencies.clear(); @@ -349,6 +361,8 @@ public: return NumNodesSolelyBlocking[NodeNum]; } + unsigned size() const { return Queue.size(); } + bool empty() const { return Queue.empty(); } virtual void push(SUnit *U) { @@ -368,22 +382,10 @@ public: return V; } - // ScheduledNode - As nodes are scheduled, we look to see if there are any - // successor nodes that have a single unscheduled predecessor. If so, that - // single predecessor has a higher priority, since scheduling it will make - // the node available. - void ScheduledNode(SUnit *Node); - -private: - void CalculatePriorities(); - int CalcLatency(const SUnit &SU); - void AdjustPriorityOfUnscheduledPreds(SUnit *SU); - SUnit *getSingleUnscheduledPred(SUnit *SU); - - /// RemoveFromPriorityQueue - This is a really inefficient way to remove a - /// node from a priority queue. We should roll our own heap to make this - /// better or something. - void RemoveFromPriorityQueue(SUnit *SU) { + /// remove - This is a really inefficient way to remove a node from a + /// priority queue. We should roll our own heap to make this better or + /// something. + void remove(SUnit *SU) { std::vector<SUnit*> Temp; assert(!Queue.empty() && "Not in queue!"); @@ -400,6 +402,18 @@ private: for (unsigned i = 0, e = Temp.size(); i != e; ++i) Queue.push(Temp[i]); } + + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); }; } @@ -507,7 +521,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { // Okay, we found a single predecessor that is available, but not scheduled. // Since it is available, it must be in the priority queue. First remove it. - RemoveFromPriorityQueue(OnlyAvailablePred); + remove(OnlyAvailablePred); // Reinsert the node into the priority queue, which recomputes its // NumNodesSolelyBlocking value. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ea23c278ba..0228d8afcd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -25,6 +25,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include <climits> #include <queue> @@ -52,9 +53,16 @@ private: bool isBottomUp; /// AvailableQueue - The priority queue to use for the available SUnits. - /// + ///a SchedulingPriorityQueue *AvailableQueue; + /// LiveRegs / LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + SmallSet<unsigned, 4> LiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + public: ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, const TargetMachine &tm, bool isbottomup, @@ -72,8 +80,13 @@ public: private: void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle); + void CapturePred(SUnit *PredSU, SUnit *SU, bool isChain); void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void UnscheduleNodeBottomUp(SUnit *SU); + SUnit *BackTrackBottomUp(SUnit*, unsigned, unsigned&, bool&); + SUnit *CopyAndMoveSuccessors(SUnit *SU); + bool DelayForLiveRegsBottomUp(SUnit *SU, unsigned &CurCycle); void ListScheduleTopDown(); void ListScheduleBottomUp(); void CommuteNodesToReducePressure(); @@ -84,7 +97,10 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { DOUT << "********** List Scheduling **********\n"; - + + LiveRegDefs.resize(MRI->getNumRegs(), NULL); + LiveRegCycles.resize(MRI->getNumRegs(), 0); + // Build scheduling units. BuildSchedUnits(); @@ -130,7 +146,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { continue; SDNode *OpN = SU->Node->getOperand(j).Val; - SUnit *OpSU = SUnitMap[OpN]; + SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { // Ok, so SU is not the last use of OpSU, but SU is two-address so // it will clobber OpSU. Try to commute SU if no other source operands @@ -139,7 +155,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { for (unsigned k = 0; k < NumOps; ++k) { if (k != j) { OpN = SU->Node->getOperand(k).Val; - OpSU = SUnitMap[OpN]; + OpSU = SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { DoCommute = false; break; @@ -178,9 +194,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); if (!isChain) - PredSU->NumSuccsLeft--; + --PredSU->NumSuccsLeft; else - PredSU->NumChainSuccsLeft--; + --PredSU->NumChainSuccsLeft; #ifndef NDEBUG if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { @@ -209,19 +225,273 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { SU->Cycle = CurCycle; AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); // Bottom up: release predecessors for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) + I != E; ++I) { ReleasePred(I->Dep, I->isCtrl, CurCycle); + if (I->Cost < 0) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (LiveRegs.insert(I->Reg)) { + LiveRegDefs[I->Reg] = I->Dep; + LiveRegCycles[I->Reg] = CurCycle; + } + } + } + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->Cost < 0) { + if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { + LiveRegs.erase(I->Reg); + assert(LiveRegDefs[I->Reg] == SU && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = NULL; + LiveRegCycles[I->Reg] = 0; + } + } + } + SU->isScheduled = true; } -/// isReady - True if node's lower cycle bound is less or equal to the current -/// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurCycle) { - return SU->CycleBound <= CurCycle; +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { + PredSU->CycleBound = 0; + for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); + I != E; ++I) { + if (I->Dep == SU) + continue; + PredSU->CycleBound = std::max(PredSU->CycleBound, + I->Dep->Cycle + PredSU->Latency); + } + + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + if (!isChain) + ++PredSU->NumSuccsLeft; + else + ++PredSU->NumChainSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; + DEBUG(SU->dump(&DAG)); + + AvailableQueue->UnscheduledNode(SU); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(I->Dep, SU, I->isCtrl); + if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { + LiveRegs.erase(I->Reg); + assert(LiveRegDefs[I->Reg] == I->Dep && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = NULL; + LiveRegCycles[I->Reg] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->Cost < 0) { + if (LiveRegs.insert(I->Reg)) { + assert(!LiveRegDefs[I->Reg] && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = SU; + } + if (I->Dep->Cycle < LiveRegCycles[I->Reg]) + LiveRegCycles[I->Reg] = I->Dep->Cycle; + } + } + + SU->Cycle = 0; + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); +} + +/// BackTrackBottomUp - Back track scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. Returns the last unscheduled +/// SUnit. Also returns if a successor is unscheduled in the process. +SUnit *ScheduleDAGRRList::BackTrackBottomUp(SUnit *SU, unsigned BTCycle, + unsigned &CurCycle, bool &SuccUnsched) { + SuccUnsched = false; + SUnit *OldSU = NULL; + while (CurCycle > BTCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + SuccUnsched = true; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + } + + + if (SU->isSucc(OldSU)) { + assert(false && "Something is wrong!"); + abort(); + } + + return OldSU; +} + +/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, +/// i.e. the node does not produce a flag, it does not read a flag and it does +/// not have an incoming chain. +static bool isSafeToCopy(SDNode *N) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDOperand &Op = N->getOperand(i); + MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); + if (VT == MVT::Other || VT == MVT::Flag) + return false; + } + + return true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + SUnit *NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isSpecial) { + NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); + NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); + } + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<SDep*, 2> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isSpecial) + continue; + NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); + if (I->Dep->isScheduled) { + I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); + DelDeps.push_back(I); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + SUnit *Succ = DelDeps[i]->Dep; + bool isCtrl = DelDeps[i]->isCtrl; + Succ->removePred(SU, isCtrl, false); + } + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + return NewSU; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, unsigned &CurCycle){ + if (LiveRegs.empty()) + return false; + + // If this node would clobber any "live" register, then it's not ready. + // However, if this is the last "available" node, then we may have to + // backtrack. + bool MustSched = AvailableQueue->empty(); + SmallVector<unsigned, 4> LRegs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->Cost < 0) { + unsigned Reg = I->Reg; + if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) + LRegs.push_back(Reg); + for (const unsigned *Alias = MRI->getAliasSet(Reg); + *Alias; ++Alias) + if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) + LRegs.push_back(*Alias); + } + } + + for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { + SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; + if (!Node->isTargetOpcode()) + continue; + const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) + LRegs.push_back(*Reg); + for (const unsigned *Alias = MRI->getAliasSet(*Reg); + *Alias; ++Alias) + if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) + LRegs.push_back(*Alias); + } + } + + if (MustSched && !LRegs.empty()) { + // We have made a mistake by scheduling some nodes too early. Now we must + // schedule the current node which will end up clobbering some live + // registers that are expensive / impossible to copy. Try unscheduling + // up to the point where it's safe to schedule the current node. + unsigned LiveCycle = CurCycle; + for (unsigned i = 0, e = LRegs.size(); i != e; ++i) { + unsigned Reg = LRegs[i]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + + if (SU->CycleBound < LiveCycle) { + bool SuccUnsched = false; + SUnit *OldSU = BackTrackBottomUp(SU, LiveCycle, CurCycle, SuccUnsched); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + SU->addPred(OldSU, true, true); + // If a successor has been unscheduled, then it's not possible to + // schedule the current node. + return SuccUnsched; + } else { + // Try duplicating the nodes that produces these "expensive to copy" + // values to break the dependency. + for (unsigned i = 0, e = LRegs.size(); i != e; ++i) { + unsigned Reg = LRegs[i]; + SUnit *LRDef = LiveRegDefs[Reg]; + if (isSafeToCopy(LRDef->Node)) { + SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + LiveRegDefs[Reg] = NewDef; + NewDef->addPred(SU, true, true); + SU->isAvailable = false; + AvailableQueue->push(NewDef); + } else { + assert(false && "Expensive copying is required?"); + abort(); + } + } + return true; + } + } + return !LRegs.empty(); } /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up @@ -229,30 +499,49 @@ static inline bool isReady(SUnit *SU, unsigned CurCycle) { void ScheduleDAGRRList::ListScheduleBottomUp() { unsigned CurCycle = 0; // Add root to Available queue. - AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); + SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. - std::vector<SUnit*> NotReady; + SmallVector<SUnit*, 4> NotReady; while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (CurNode && !isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + if (CurSU->CycleBound <= CurCycle) + if (!DelayForLiveRegsBottomUp(CurSU, CurCycle)) + break; + + // Verify node is still ready. It may not be in case the + // scheduler has backtracked. + if (CurSU->isAvailable) { + CurSU->isPending = true; + NotReady.push_back(CurSU); + } + CurSU = AvailableQueue->pop(); } // Add the nodes that aren't ready back onto the available list. - AvailableQueue->push_all(NotReady); + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } NotReady.clear(); - if (CurNode != NULL) - ScheduleNodeBottomUp(CurNode, CurCycle); - CurCycle++; + if (!CurSU) + Sequence.push_back(0); + else { + ScheduleNodeBottomUp(CurSU, CurCycle); + Sequence.push_back(CurSU); + } + ++CurCycle; } // Add entry node last if (DAG.getEntryNode().Val != DAG.getRoot().Val) { - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); Sequence.push_back(Entry); } @@ -291,9 +580,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); if (!isChain) - SuccSU->NumPredsLeft--; + --SuccSU->NumPredsLeft; else - SuccSU->NumChainPredsLeft--; + --SuccSU->NumChainPredsLeft; #ifndef NDEBUG if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { @@ -320,7 +609,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { SU->Cycle = CurCycle; AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); // Top down: release successors for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -333,7 +621,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { unsigned CurCycle = 0; - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); // All leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { @@ -346,24 +634,29 @@ void ScheduleDAGRRList::ListScheduleTopDown() { // Emit the entry node first. ScheduleNodeTopDown(Entry, CurCycle); - CurCycle++; + Sequence.push_back(Entry); + ++CurCycle; // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (CurNode && !isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU && CurSU->CycleBound > CurCycle) { + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); } // Add the nodes that aren't ready back onto the available list. AvailableQueue->push_all(NotReady); NotReady.clear(); - if (CurNode != NULL) - ScheduleNodeTopDown(CurNode, CurCycle); + if (!CurSU) + Sequence.push_back(0); + else { + ScheduleNodeTopDown(CurSU, CurCycle); |