diff options
Diffstat (limited to 'lib/CodeGen/InstrSched/InstrScheduling.cpp')
-rw-r--r-- | lib/CodeGen/InstrSched/InstrScheduling.cpp | 807 |
1 files changed, 429 insertions, 378 deletions
diff --git a/lib/CodeGen/InstrSched/InstrScheduling.cpp b/lib/CodeGen/InstrSched/InstrScheduling.cpp index 34802c9e5b..0b194207ca 100644 --- a/lib/CodeGen/InstrSched/InstrScheduling.cpp +++ b/lib/CodeGen/InstrSched/InstrScheduling.cpp @@ -9,16 +9,26 @@ // 7/23/01 - Vikram Adve - Created //**************************************************************************/ + +//************************* User Include Files *****************************/ + #include "llvm/CodeGen/InstrScheduling.h" #include "SchedPriorities.h" #include "llvm/Analysis/LiveVar/BBLiveVar.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Instruction.h" + + +//************************ System Include Files *****************************/ + #include <hash_set> #include <algorithm> #include <iterator> + +//************************* External Data Types *****************************/ + cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags, "enable instruction scheduling debugging information", clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"), @@ -27,55 +37,12 @@ cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags, clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0); +//************************* Internal Data Types *****************************/ + class InstrSchedule; class SchedulingManager; class DelaySlotInfo; -static void ForwardListSchedule (SchedulingManager& S); - -static void RecordSchedule (const BasicBlock* bb, - const SchedulingManager& S); - -static unsigned ChooseOneGroup (SchedulingManager& S); - -static void MarkSuccessorsReady (SchedulingManager& S, - const SchedGraphNode* node); - -static unsigned FindSlotChoices (SchedulingManager& S, - DelaySlotInfo*& getDelaySlotInfo); - -static void AssignInstructionsToSlots(class SchedulingManager& S, - unsigned maxIssue); - -static void ScheduleInstr (class SchedulingManager& S, - const SchedGraphNode* node, - unsigned int slotNum, - cycles_t curTime); - -static bool ViolatesMinimumGap (const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle); - -static bool ConflictsWithChoices (const SchedulingManager& S, - MachineOpCode opCode); - -static void ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph); - -static bool NodeCanFillDelaySlot (const SchedulingManager& S, - const SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -static void MarkNodeForDelaySlot (SchedulingManager& S, - SchedGraph* graph, - SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -//************************* Internal Data Types *****************************/ - //---------------------------------------------------------------------- // class InstrGroup: @@ -369,7 +336,7 @@ public: delayedNodeSlotNum = slotNum; } - void scheduleDelayedNode (SchedulingManager& S); + unsigned scheduleDelayedNode (SchedulingManager& S); }; @@ -539,7 +506,7 @@ public: if (createIfMissing) { dinfo = new DelaySlotInfo(bn, - getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); + getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); delaySlotInfoForBranches[bn] = dinfo; } else @@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, } } -//************************* External Functions *****************************/ +//************************* Internal Functions *****************************/ -//--------------------------------------------------------------------------- -// Function: ScheduleInstructionsWithSSA -// -// Purpose: -// Entry point for instruction scheduling on SSA form. -// Schedules the machine instructions generated by instruction selection. -// Assumes that register allocation has not been done, i.e., operands -// are still in SSA form. -//--------------------------------------------------------------------------- - -bool -ScheduleInstructionsWithSSA(Method* method, - const TargetMachine &target) +static void +AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) { - SchedGraphSet graphSet(method, target); - - if (SchedDebugLevel >= Sched_PrintSchedGraphs) - { - cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" - << endl; - graphSet.dump(); - } - - for (SchedGraphSet::const_iterator GI=graphSet.begin(); - GI != graphSet.end(); ++GI) - { - SchedGraph* graph = (*GI).second; - const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks(); - assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); - const BasicBlock* bb = bbvec[0]; - - if (SchedDebugLevel >= Sched_PrintSchedTrace) - cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; - - SchedPriorities schedPrio(method, graph); // expensive! - SchedulingManager S(target, graph, schedPrio); - - ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph - - ForwardListSchedule(S); // computes schedule in S - - RecordSchedule((*GI).first, S); // records schedule in BB - } + // find the slot to start from, in the current cycle + unsigned int startSlot = 0; + cycles_t curTime = S.getTime(); - if (SchedDebugLevel >= Sched_PrintMachineCode) - { - cout << endl - << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; - PrintMachineInstructions(method); - } + assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); - return false; // no reason to fail yet -} - - -// Check minimum gap requirements relative to instructions scheduled in -// previous cycles. -// Note that we do not need to consider `nextEarliestIssueTime' here because -// that is also captured in the earliest start times for each opcode. -// -static inline bool -ViolatesMinimumGap(const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle) -{ - return (inCycle < S.getEarliestStartTimeForOp(opCode)); -} - - -// Check if the instruction would conflict with instructions already -// chosen for the current cycle -// -static inline bool -ConflictsWithChoices(const SchedulingManager& S, - MachineOpCode opCode) -{ - // Check if the instruction must issue by itself, and some feasible - // choices have already been made for this cycle - if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) - return true; + // If only one instruction can be issued, do so. + if (maxIssue == 1) + for (unsigned s=startSlot; s < S.nslots; s++) + if (S.getChoicesForSlot(s).size() > 0) + {// found the one instruction + S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); + return; + } - // For each class that opCode belongs to, check if there are too many - // instructions of that class. + // Otherwise, choose from the choices for each slot // - const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); - return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); -} - - -// Check if any issue restrictions would prevent the instruction from -// being issued in the current cycle -// -bool -instrIsFeasible(const SchedulingManager& S, - MachineOpCode opCode) -{ - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously issued instructions - if (ViolatesMinimumGap(S, opCode, S.getTime())) - return false; - - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously chosen instructions for the current cycle - if (ConflictsWithChoices(S, opCode)) - return false; - - return true; -} - - -//************************* Internal Functions *****************************/ - - -static void -ForwardListSchedule(SchedulingManager& S) -{ - unsigned N; - const SchedGraphNode* node; - - S.schedPrio.initialize(); + InstrGroup* igroup = S.isched.getIGroup(S.getTime()); + assert(igroup != NULL && "Group creation failed?"); - while ((N = S.schedPrio.getNumReady()) > 0) + // Find a slot that has only a single choice, and take it. + // If all slots have 0 or multiple choices, pick the first slot with + // choices and use its last instruction (just to avoid shifting the vector). + unsigned numIssued; + for (numIssued = 0; numIssued < maxIssue; numIssued++) { - // Choose one group of instructions for a cycle. This will - // advance S.getTime() to the first cycle instructions can be issued. - // It may also schedule delay slot instructions in later cycles, - // but those are ignored here because they are outside the graph. - // - unsigned numIssued = ChooseOneGroup(S); - assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); - - // Notify the priority manager of scheduled instructions and mark - // any successors that may now be ready - // - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - if ((node = (*igroup)[s]) != NULL) + int chosenSlot = -1, chosenNodeIndex = -1; + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) { - S.schedPrio.issuedReadyNodeAt(S.getTime(), node); - MarkSuccessorsReady(S, node); + chosenSlot = (int) s; + break; } - // Move to the next the next earliest cycle for which - // an instruction can be issued, or the next earliest in which - // one will be ready, or to the next cycle, whichever is latest. - // - S.updateTime(max(S.getTime() + 1, - max(S.getEarliestIssueTime(), - S.schedPrio.getEarliestReadyTime()))); + if (chosenSlot == -1) + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) + { + chosenSlot = (int) s; + break; + } + + if (chosenSlot != -1) + { // Insert the chosen instr in the chosen slot and + // erase it from all slots. + const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); + S.scheduleInstr(node, chosenSlot, curTime); + } } + + assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); } @@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S) } -static unsigned -ChooseOneGroup(SchedulingManager& S) -{ - assert(S.schedPrio.getNumReady() > 0 - && "Don't get here without ready instructions."); - - DelaySlotInfo* getDelaySlotInfo = NULL; - - // Choose up to `nslots' feasible instructions and their possible slots. - unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - - while (numIssued == 0) - { - S.updateTime(S.getTime()+1); - numIssued = FindSlotChoices(S, getDelaySlotInfo); - } - - AssignInstructionsToSlots(S, numIssued); - - if (getDelaySlotInfo != NULL) - getDelaySlotInfo->scheduleDelayedNode(S); - - // Print trace of scheduled instructions before newly ready ones - if (SchedDebugLevel >= Sched_PrintSchedTrace) - { - cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n"; - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - { - cout << " "; - if ((*igroup)[s] != NULL) - cout << * ((*igroup)[s])->getMachineInstr() << endl; - else - cout << "<none>" << endl; - } - } - - return numIssued; -} - static void MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) @@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S, { // Try to assign every other instruction to a lower numbered // slot than delayedNodeSlot. - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); bool noSlotFound = true; unsigned int s; for (s=startSlot; s < delayedNodeSlot; s++) @@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S, for (unsigned i=0; i < S.getNumChoices() && i < indexForBreakingNode; i++) { - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); // If a higher priority instruction cannot be assigned to // any earlier slots, don't schedule the breaking instruction. @@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S, } -static void -AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) +static unsigned +ChooseOneGroup(SchedulingManager& S) { - // find the slot to start from, in the current cycle - unsigned int startSlot = 0; - cycles_t curTime = S.getTime(); + assert(S.schedPrio.getNumReady() > 0 + && "Don't get here without ready instructions."); - assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); + cycles_t firstCycle = S.getTime(); + DelaySlotInfo* getDelaySlotInfo = NULL; - // If only one instruction can be issued, do so. - if (maxIssue == 1) - for (unsigned s=startSlot; s < S.nslots; s++) - if (S.getChoicesForSlot(s).size() > 0) - {// found the one instruction - S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); - return; - } + // Choose up to `nslots' feasible instructions and their possible slots. + unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - // Otherwise, choose from the choices for each slot - // - InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - assert(igroup != NULL && "Group creation failed?"); + while (numIssued == 0) + { + S.updateTime(S.getTime()+1); + numIssued = FindSlotChoices(S, getDelaySlotInfo); + } - // Find a slot that has only a single choice, and take it. - // If all slots have 0 or multiple choices, pick the first slot with - // choices and use its last instruction (just to avoid shifting the vector). - unsigned numIssued; - for (numIssued = 0; numIssued < maxIssue; numIssued++) + AssignInstructionsToSlots(S, numIssued); + + if (getDelaySlotInfo != NULL) + numIssued += getDelaySlotInfo->scheduleDelayedNode(S); + + // Print trace of scheduled instructions before newly ready ones + if (SchedDebugLevel >= Sched_PrintSchedTrace) { - int chosenSlot = -1, chosenNodeIndex = -1; - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot == -1) - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot != -1) - { // Insert the chosen instr in the chosen slot and - // erase it from all slots. - const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); - S.scheduleInstr(node, chosenSlot, curTime); - } + for (cycles_t c = firstCycle; c <= S.getTime(); c++) + { + cout << " Cycle " << c << " : Scheduled instructions:\n"; + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + { + cout << " "; + if ((*igroup)[s] != NULL) + cout << * ((*igroup)[s])->getMachineInstr() << endl; + else + cout << "<none>" << endl; + } + } } - assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); + return numIssued; } +static void +ForwardListSchedule(SchedulingManager& S) +{ + unsigned N; + const SchedGraphNode* node; + + S.schedPrio.initialize(); + + while ((N = S.schedPrio.getNumReady()) > 0) + { + cycles_t nextCycle = S.getTime(); + + // Choose one group of instructions for a cycle, plus any delay slot + // instructions (which may overflow into successive cycles). + // This will advance S.getTime() to the last cycle in which + // instructions are actually issued. + // + unsigned numIssued = ChooseOneGroup(S); + assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); + + // Notify the priority manager of scheduled instructions and mark + // any successors that may now be ready + // + for (cycles_t c = nextCycle; c <= S.getTime(); c++) + { + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + if ((node = (*igroup)[s]) != NULL) + { + S.schedPrio.issuedReadyNodeAt(S.getTime(), node); + MarkSuccessorsReady(S, node); + } + } + + // Move to the next the next earliest cycle for which + // an instruction can be issued, or the next earliest in which + // one will be ready, or to the next cycle, whichever is latest. + // + S.updateTime(max(S.getTime() + 1, + max(S.getEarliestIssueTime(), + S.schedPrio.getEarliestReadyTime()))); + } +} + //--------------------------------------------------------------------- // Code for filling delay slots for delayed terminator instructions @@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) // when we cannot find single-cycle instructions that can be reordered. //---------------------------------------------------------------------- -static void -ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph) -{ - // Look for instructions that can be used for delay slots. - // Remove them from the graph, and mark them to be used for delay slots. - const MachineInstrInfo& mii = S.getInstrInfo(); - const TerminatorInst* term = bb->getTerminator(); - MachineCodeForVMInstr& termMvec = term->getMachineInstrVec(); - - // Find the first branch instr in the sequence of machine instrs for term - // - unsigned first = 0; - while (! mii.isBranch(termMvec[first]->getOpCode())) - ++first; - assert(first < termMvec.size() && - "No branch instructions for BR? Ok, but weird! Delete assertion."); - if (first == termMvec.size()) - return; - - SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]); - assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?"); - - unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); - if (ndelays == 0) - return; - - // Use vectors to remember the nodes chosen for delay slots, and the - // NOPs that will be unused. We cannot remove them from the graph while - // walking through the preds and succs of the brNode here, so we - // remember the nodes in the vectors and remove them later. - // We use separate vectors for the single-cycle and multi-cycle nodes, - // so that we can give preference to single-cycle nodes. - // - vector<SchedGraphNode*> sdelayNodeVec; - vector<SchedGraphNode*> mdelayNodeVec; - vector<SchedGraphNode*> nopNodeVec; - unsigned numUseful = 0; - - sdelayNodeVec.reserve(ndelays); - - for (sg_pred_iterator P = pred_begin(brNode); - P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) - if (! (*P)->isDummyNode() && - ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && - NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) - { - ++numUseful; - if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) - mdelayNodeVec.push_back(*P); - else - sdelayNodeVec.push_back(*P); - } - - // If not enough single-cycle instructions were found, select the - // lowest-latency multi-cycle instructions and use them. - // Note that this is the most efficient code when only 1 (or even 2) - // values need to be selected. - // - while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) - { - unsigned latency; - unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); - unsigned minIndex = 0; - for (unsigned i=1; i < mdelayNodeVec.size(); i++) - if (minLatency >= - (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()))) - { - minLatency = latency; - minIndex = i; - } - sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); - if (sdelayNodeVec.size() < ndelays) // avoid the last erase! - mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); - } - - // Now, remove the NOPs currently in delay slots from the graph. - // If not enough useful instructions were found, use the NOPs to - // fill delay slots, otherwise, just discard them. - for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I) - if (! (*I)->isDummyNode() - && mii.isNop((*I)->getMachineInstr()->getOpCode())) - { - if (sdelayNodeVec.size() < ndelays) - sdelayNodeVec.push_back(*I); - else - nopNodeVec.push_back(*I); - } - - // Mark the nodes chosen for delay slots. This removes them from the graph. - for (unsigned i=0; i < sdelayNodeVec.size(); i++) - MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true); - - // And remove the unused NOPs from the graph. - for (unsigned i=0; i < nopNodeVec.size(); i++) - graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); -} - - -bool +static bool NodeCanFillDelaySlot(const SchedulingManager& S, const SchedGraphNode* node, const SchedGraphNode* brNode, @@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S, } -void +static void MarkNodeForDelaySlot(SchedulingManager& S, SchedGraph* graph, SchedGraphNode* node, @@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S, } +void +FindUsefulInstructionsForDelaySlots(SchedulingManager& S, + SchedGraphNode* brNode, + vector<SchedGraphNode*>& sdelayNodeVec) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays = + mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); + + if (ndelays == 0) + return; + + sdelayNodeVec.reserve(ndelays); + + // Use a separate vector to hold the feasible multi-cycle nodes. + // These will be used if not enough single-cycle nodes are found. + // + vector<SchedGraphNode*> mdelayNodeVec; + + for (sg_pred_iterator P = pred_begin(brNode); + P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) + if (! (*P)->isDummyNode() && + ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && + NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) + { + if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) + mdelayNodeVec.push_back(*P); + else + sdelayNodeVec.push_back(*P); + } + + // If not enough single-cycle instructions were found, select the + // lowest-latency multi-cycle instructions and use them. + // Note that this is the most efficient code when only 1 (or even 2) + // values need to be selected. + // + while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) + { + unsigned lmin = + mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); + unsigned minIndex = 0; + for (unsigned i=1; i < mdelayNodeVec.size(); i++) + { + unsigned li = + mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()); + if (lmin >= li) + { + lmin = li; + minIndex = i; + } + } + sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); + if (sdelayNodeVec.size() < ndelays) // avoid the last erase! + mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); + } +} + + +// Remove the NOPs currently in delay slots from the graph. +// Mark instructions specified in sdelayNodeVec to replace them. +// If not enough useful instructions were found, mark the NOPs to be used +// for filling delay slots, otherwise, otherwise just discard them. +// +void +ReplaceNopsWithUsefulInstr(SchedulingManager& S, + SchedGraphNode* node, + vector<SchedGraphNode*> sdelayNodeVec, + SchedGraph* graph) +{ + vector<SchedGraphNode*> nopNodeVec; + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode()); + assert(ndelays > 0 && "Unnecessary call to replace NOPs"); + + // Remove the NOPs currently in delay slots from the graph. + // If not enough useful instructions were found, use the NOPs to + // fill delay slots, otherwise, just discard them. + for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I) + if (! (*I)->isDummyNode() + && mii.isNop((*I)->getMachineInstr()->getOpCode())) + { + if (sdelayNodeVec.size() < ndelays) + sdelayNodeVec.push_back(*I); + else + nopNodeVec.push_back(*I); + } + assert(sdelayNodeVec.size() == ndelays); + + // Mark the nodes chosen for delay slots. This removes them from the graph. + for (unsigned i=0; i < sdelayNodeVec.size(); i++) + MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); + + // And remove the unused NOPs from the graph. + for (unsigned i=0; i < nopNodeVec.size(); i++) + graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); +} + + +// For all delayed instructions, choose instructions to put in the delay +// slots and pull those out of the graph. Mark them for the delay slots +// in the DelaySlotInfo object for that graph node. If no useful work +// is found for a delay slot, use the NOP that is currently in that slot. +// +// We try to fill the delay slots with useful work for all instructions +// except CALLs. For CALLs, it is nearly always possible to use one of the +// call sequence instrs and putting anything else in the delay slot could be +// suboptimal. +// +static void +ChooseInstructionsForDelaySlots(SchedulingManager& S, + const BasicBlock* bb, + SchedGraph* graph) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + const TerminatorInst* termInstr = bb->getTerminator(); + MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec(); + vector<SchedGraphNode*> delayNodeVec; + const MachineInstr* brInstr; + + assert(termInstr->getOpcode() != Instruction::Call + && "Call used as terminator?"); + + // To find instructions that need delay slots without searching the entire + // machine code, we assume the only delayed instructions are CALLs or + // instructions generated for the terminator inst. + // Find the first branch instr in the sequence of machine instrs for term + // + unsigned first = 0; + while (first < termMvec.size() && + ! mii.isBranch(termMvec[first]->getOpCode())) + { + ++first; + } + assert(first < termMvec.size() && + "No branch instructions for BR? Ok, but weird! Delete assertion."); + + brInstr = (first < termMvec.size())? termMvec[first] : NULL; + + // Compute a vector of the nodes chosen for delay slots and then + // mark delay slots to replace NOPs with these useful instructions. + // + if (brInstr != NULL) + { + SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); + FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); + ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); + } + + // Also mark delay slots for other delayed instructions to hold NOPs. + // Simply passing in an empty delayNodeVec will have this effect. + // + delayNodeVec.clear(); + const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec(); + for (unsigned i=0; i < bbMvec.size(); i++) + if (bbMvec[i] != brInstr && + mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0) + { + SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]); + ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); + } +} + + // // Schedule the delayed branch and its delay slots // -void +unsigned DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) { assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); @@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); break; } + + return 1 + ndelays; +} + + +// Check if the instruction would conflict with instructions already +// chosen for the current cycle +// +static inline bool +ConflictsWithChoices(const SchedulingManager& S, + MachineOpCode opCode) +{ + // Check if the instruction must issue by itself, and some feasible + // choices have already been made for this cycle + if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) + return true; + + // For each class that opCode belongs to, check if there are too many + // instructions of that class. + // + const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); + return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); +} + + +//************************* External Functions *****************************/ + + +//--------------------------------------------------------------------------- +// Function: ViolatesMinimumGap +// +// Purpose: +// Check minimum gap requirements relative to instructions scheduled in +// previous cycles. +// Note that we do not need to consider `nextEarliestIssueTime' here because +// that is also captured in the earliest start times for each opcode. +//--------------------------------------------------------------------------- + +static inline bool +ViolatesMinimumGap(const SchedulingManager& S, + MachineOpCode opCode, + const cycles_t inCycle) +{ + return (inCycle < S.getEarliestStartTimeForOp(opCode)); +} + + +//--------------------------------------------------------------------------- +// Function: instrIsFeasible +// +// Purpose: +// Check if any issue restrictions would prevent the instruction from +// being issued in the current cycle +//--------------------------------------------------------------------------- + +bool +instrIsFeasible(const SchedulingManager& S, + MachineOpCode opCode) +{ + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously issued instructions + if (ViolatesMinimumGap(S, opCode, S.getTime())) + return false; + + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously chosen instructions for the current cycle + if (ConflictsWithChoices(S, opCode)) + return false; + + return true; +} + +//--------------------------------------------------------------------------- +// Function: ScheduleInstructionsWithSSA +// +// Purpose: +// Entry point for instruction scheduling on SSA form. +// Schedules the machine instructions generated by instruction selection. +// Assumes that register allocation has not been done, i.e., operands +// are still in SSA form. +//--------------------------------------------------------------------------- + +bool +ScheduleInstructionsWithSSA(Method* method, + const TargetMachine &target) +{ + SchedGraphSet graphSet(method, target); + + if (SchedDebugLevel >= Sched_PrintSchedGraphs) + { + cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" + << endl; + graphSet.dump(); + } + + for (SchedGraphSet::const_iterator GI=graphSet.begin(); + GI != graphSet.end(); ++GI) + { + SchedGraph* graph = (*GI).second; + const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks(); + assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); + const BasicBlock* bb = bbvec[0]; + + if (SchedDebugLevel >= Sched_PrintSchedTrace) + cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; + + SchedPriorities schedPrio(method, graph); // expensive! + SchedulingManager S(target, graph, schedPrio); + + ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph + + ForwardListSchedule(S); // computes schedule in S + + RecordSchedule((*GI).first, S); // records schedule in BB + } + + if (SchedDebugLevel >= Sched_PrintMachineCode) + { + cout << endl + << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; + PrintMachineInstructions(method); + } + + return false; // no reason to fail yet } + |