diff options
-rw-r--r-- | lib/CodeGen/ModuloScheduling/MSSchedule.cpp | 6 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/MSchedGraph.cpp | 32 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp | 986 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloScheduling.h | 11 | ||||
-rw-r--r-- | lib/Target/SparcV9/ModuloScheduling/MSSchedule.cpp | 6 | ||||
-rw-r--r-- | lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp | 32 | ||||
-rw-r--r-- | lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp | 986 | ||||
-rw-r--r-- | lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h | 11 |
8 files changed, 1482 insertions, 588 deletions
diff --git a/lib/CodeGen/ModuloScheduling/MSSchedule.cpp b/lib/CodeGen/ModuloScheduling/MSSchedule.cpp index dfee1d15be..8ec19dad72 100644 --- a/lib/CodeGen/ModuloScheduling/MSSchedule.cpp +++ b/lib/CodeGen/ModuloScheduling/MSSchedule.cpp @@ -49,12 +49,12 @@ bool MSSchedule::insert(MSchedGraphNode *node, int cycle) { bool MSSchedule::resourcesFree(MSchedGraphNode *node, int cycle) { //Get Resource usage for this instruction - const TargetSchedInfo & msi = node->getParent()->getTarget()->getSchedInfo(); + const TargetSchedInfo *msi = node->getParent()->getTarget()->getSchedInfo(); int currentCycle = cycle; bool success = true; //Get resource usage for this instruction - InstrRUsage rUsage = msi.getInstrRUsage(node->getInst()->getOpcode()); + InstrRUsage rUsage = msi->getInstrRUsage(node->getInst()->getOpcode()); std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; //Loop over resources in each cycle and increments their usage count @@ -101,7 +101,7 @@ bool MSSchedule::resourcesFree(MSchedGraphNode *node, int cycle) { int oldCycle = cycle; DEBUG(std::cerr << "Backtrack\n"); //Get resource usage for this instruction - InstrRUsage rUsage = msi.getInstrRUsage(node->getInst()->getOpcode()); + InstrRUsage rUsage = msi->getInstrRUsage(node->getInst()->getOpcode()); std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; //Loop over resources in each cycle and increments their usage count diff --git a/lib/CodeGen/ModuloScheduling/MSchedGraph.cpp b/lib/CodeGen/ModuloScheduling/MSchedGraph.cpp index b441bea051..9dd955d744 100644 --- a/lib/CodeGen/ModuloScheduling/MSchedGraph.cpp +++ b/lib/CodeGen/ModuloScheduling/MSchedGraph.cpp @@ -103,7 +103,7 @@ MSchedGraph::~MSchedGraph () { void MSchedGraph::buildNodesAndEdges() { //Get Machine target information for calculating latency - const TargetInstrInfo &MTI = Target.getInstrInfo(); + const TargetInstrInfo *MTI = Target.getInstrInfo(); std::vector<MSchedGraphNode*> memInstructions; std::map<int, std::vector<OpIndexNodePair> > regNumtoNodeMap; @@ -124,16 +124,16 @@ void MSchedGraph::buildNodesAndEdges() { #if 0 // FIXME: LOOK INTO THIS //Check if subsequent instructions can be issued before //the result is ready, if so use min delay. - if(MTI.hasResultInterlock(MIopCode)) - delay = MTI.minLatency(MIopCode); + if(MTI->hasResultInterlock(MIopCode)) + delay = MTI->minLatency(MIopCode); else #endif //Get delay - delay = MTI.maxLatency(opCode); + delay = MTI->maxLatency(opCode); //Create new node for this machine instruction and add to the graph. //Create only if not a nop - if(MTI.isNop(opCode)) + if(MTI->isNop(opCode)) continue; //Add PHI to phi instruction list to be processed later @@ -143,7 +143,7 @@ void MSchedGraph::buildNodesAndEdges() { bool isBranch = false; //We want to flag the branch node to treat it special - if(MTI.isBranch(opCode)) + if(MTI->isBranch(opCode)) isBranch = true; //Node is created and added to the graph automatically @@ -152,7 +152,7 @@ void MSchedGraph::buildNodesAndEdges() { DEBUG(std::cerr << "Created Node: " << *node << "\n"); //Check OpCode to keep track of memory operations to add memory dependencies later. - if(MTI.isLoad(opCode) || MTI.isStore(opCode)) + if(MTI->isLoad(opCode) || MTI->isStore(opCode)) memInstructions.push_back(node); //Loop over all operands, and put them into the register number to @@ -370,7 +370,7 @@ void MSchedGraph::addMachRegEdges(std::map<int, std::vector<OpIndexNodePair> >& void MSchedGraph::addMemEdges(const std::vector<MSchedGraphNode*>& memInst) { //Get Target machine instruction info - const TargetInstrInfo& TMI = Target.getInstrInfo(); + const TargetInstrInfo *TMI = Target.getInstrInfo(); //Loop over all memory instructions in the vector //Knowing that they are in execution, add true, anti, and output dependencies @@ -383,15 +383,15 @@ void MSchedGraph::addMemEdges(const std::vector<MSchedGraphNode*>& memInst) { for(unsigned destIndex = srcIndex + 1; destIndex < memInst.size(); ++destIndex) { //source is a Load, so add anti-dependencies (store after load) - if(TMI.isLoad(srcNodeOpCode)) - if(TMI.isStore(memInst[destIndex]->getInst()->getOpcode())) + if(TMI->isLoad(srcNodeOpCode)) + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) memInst[srcIndex]->addOutEdge(memInst[destIndex], MSchedGraphEdge::MemoryDep, MSchedGraphEdge::AntiDep); //If source is a store, add output and true dependencies - if(TMI.isStore(srcNodeOpCode)) { - if(TMI.isStore(memInst[destIndex]->getInst()->getOpcode())) + if(TMI->isStore(srcNodeOpCode)) { + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) memInst[srcIndex]->addOutEdge(memInst[destIndex], MSchedGraphEdge::MemoryDep, MSchedGraphEdge::OutputDep); @@ -405,13 +405,13 @@ void MSchedGraph::addMemEdges(const std::vector<MSchedGraphNode*>& memInst) { //All instructions before the src in execution order have an iteration delay of 1 for(unsigned destIndex = 0; destIndex < srcIndex; ++destIndex) { //source is a Load, so add anti-dependencies (store after load) - if(TMI.isLoad(srcNodeOpCode)) - if(TMI.isStore(memInst[destIndex]->getInst()->getOpcode())) + if(TMI->isLoad(srcNodeOpCode)) + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) memInst[srcIndex]->addOutEdge(memInst[destIndex], MSchedGraphEdge::MemoryDep, MSchedGraphEdge::AntiDep, 1); - if(TMI.isStore(srcNodeOpCode)) { - if(TMI.isStore(memInst[destIndex]->getInst()->getOpcode())) + if(TMI->isStore(srcNodeOpCode)) { + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) memInst[srcIndex]->addOutEdge(memInst[destIndex], MSchedGraphEdge::MemoryDep, MSchedGraphEdge::OutputDep, 1); diff --git a/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp b/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp index 62ca490c98..7cec3f1f13 100644 --- a/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp +++ b/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp @@ -6,17 +6,22 @@ // the University of Illinois Open Source License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// // -// +// This ModuloScheduling pass is based on the Swing Modulo Scheduling +// algorithm. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ModuloSched" #include "ModuloScheduling.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/InstrSelection.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineCodeForInstruction.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CFG.h" +#include "Support/Casting.h" #include "llvm/Target/TargetSchedInfo.h" #include "Support/Debug.h" #include "Support/GraphWriter.h" @@ -25,7 +30,8 @@ #include <utility> #include <fstream> #include <sstream> - +#include "../../Target/SparcV9/SparcV9Internals.h" +#include "../../Target/SparcV9/SparcV9RegisterInfo.h" using namespace llvm; @@ -36,6 +42,8 @@ FunctionPass *llvm::createModuloSchedulingPass(TargetMachine & targ) { return new ModuloSchedulingPass(targ); } + +//Graph Traits for printing out the dependence graph template<typename GraphType> static void WriteGraphToFile(std::ostream &O, const std::string &GraphName, const GraphType >) { @@ -50,6 +58,7 @@ static void WriteGraphToFile(std::ostream &O, const std::string &GraphName, O << "\n"; }; +//Graph Traits for printing out the dependence graph namespace llvm { template<> @@ -100,93 +109,132 @@ namespace llvm { return edgelabel; } - - - }; } /// ModuloScheduling::runOnFunction - main transformation entry point +/// The Swing Modulo Schedule algorithm has three basic steps: +/// 1) Computation and Analysis of the dependence graph +/// 2) Ordering of the nodes +/// 3) Scheduling +/// bool ModuloSchedulingPass::runOnFunction(Function &F) { + bool Changed = false; - - DEBUG(std::cerr << "Creating ModuloSchedGraph for each BasicBlock in" + F.getName() + "\n"); + + DEBUG(std::cerr << "Creating ModuloSchedGraph for each valid BasicBlock in" + F.getName() + "\n"); //Get MachineFunction MachineFunction &MF = MachineFunction::get(&F); - - //Iterate over BasicBlocks and do ModuloScheduling if they are valid - for (MachineFunction::const_iterator BI = MF.begin(); BI != MF.end(); ++BI) { - if(MachineBBisValid(BI)) { - MSchedGraph *MSG = new MSchedGraph(BI, target); - - //Write Graph out to file - DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); - - //Print out BB for debugging - DEBUG(BI->print(std::cerr)); - - //Calculate Resource II - int ResMII = calculateResMII(BI); - //Calculate Recurrence II - int RecMII = calculateRecMII(MSG, ResMII); - - II = std::max(RecMII, ResMII); - - - DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << "and ResMII=" << ResMII << "\n"); + //Print out machine function + DEBUG(MF.print(std::cerr)); - //Calculate Node Properties - calculateNodeAttributes(MSG, ResMII); + //Worklist + std::vector<MachineBasicBlock*> Worklist; + + //Iterate over BasicBlocks and put them into our worklist if they are valid + for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) + if(MachineBBisValid(BI)) + Worklist.push_back(&*BI); + - //Dump node properties if in debug mode - for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I !=E; ++I) { - DEBUG(std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth << " Height: " << I->second.height << "\n"); - } + //Iterate over the worklist and perform scheduling + for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(), + BE = Worklist.end(); BI != BE; ++BI) { + + MSchedGraph *MSG = new MSchedGraph(*BI, target); + + //Write Graph out to file + DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); + + //Print out BB for debugging + DEBUG((*BI)->print(std::cerr)); + + //Calculate Resource II + int ResMII = calculateResMII(*BI); + + //Calculate Recurrence II + int RecMII = calculateRecMII(MSG, ResMII); + + //Our starting initiation interval is the maximum of RecMII and ResMII + II = std::max(RecMII, ResMII); + + //Print out II, RecMII, and ResMII + DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << "and ResMII=" << ResMII << "\n"); + + //Calculate Node Properties + calculateNodeAttributes(MSG, ResMII); + + //Dump node properties if in debug mode + DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), + E = nodeToAttributesMap.end(); I !=E; ++I) { + std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " + << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth + << " Height: " << I->second.height << "\n"; + }); + + //Put nodes in order to schedule them + computePartialOrder(); + + //Dump out partial order + DEBUG(for(std::vector<std::vector<MSchedGraphNode*> >::iterator I = partialOrder.begin(), + E = partialOrder.end(); I !=E; ++I) { + std::cerr << "Start set in PO\n"; + for(std::vector<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) + std::cerr << "PO:" << **J << "\n"; + }); + + //Place nodes in final order + orderNodes(); + + //Dump out order of nodes + DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) { + std::cerr << "FO:" << **I << "\n"; + }); + + //Finally schedule nodes + computeSchedule(); + + //Print out final schedule + DEBUG(schedule.print(std::cerr)); - //Put nodes in order to schedule them - computePartialOrder(); - - //Dump out partial order - for(std::vector<std::vector<MSchedGraphNode*> >::iterator I = partialOrder.begin(), E = partialOrder.end(); I !=E; ++I) { - DEBUG(std::cerr << "Start set in PO\n"); - for(std::vector<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) - DEBUG(std::cerr << "PO:" << **J << "\n"); - } - - orderNodes(); - - //Dump out order of nodes - for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) - DEBUG(std::cerr << "FO:" << **I << "\n"); - - - //Finally schedule nodes - computeSchedule(); - - DEBUG(schedule.print(std::cerr)); - - reconstructLoop(BI); - - nodeToAttributesMap.clear(); - partialOrder.clear(); - recurrenceList.clear(); - FinalNodeOrder.clear(); - schedule.clear(); - } + //Final scheduling step is to reconstruct the loop + reconstructLoop(*BI); + + //Print out new loop + + //Clear out our maps for the next basic block that is processed + nodeToAttributesMap.clear(); + partialOrder.clear(); + recurrenceList.clear(); + FinalNodeOrder.clear(); + schedule.clear(); + + //Clean up. Nuke old MachineBB and llvmBB + //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock(); + //Function *parent = (Function*) llvmBB->getParent(); + //Should't std::find work?? + //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB)); + //parent->getBasicBlockList().erase(llvmBB); + + //delete(llvmBB); + //delete(*BI); } - - + + return Changed; } +/// This function checks if a Machine Basic Block is valid for modulo +/// scheduling. This means that it has no control flow (if/else or +/// calls) in the block. Currently ModuloScheduling only works on +/// single basic block loops. bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { - //Valid basic blocks must be loops and can not have if/else statements or calls. bool isLoop = false; //Check first if its a valid loop @@ -196,25 +244,20 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { isLoop = true; } - if(!isLoop) { - DEBUG(std::cerr << "Basic Block is not a loop\n"); + if(!isLoop) return false; - } - else - DEBUG(std::cerr << "Basic Block is a loop\n"); - + //Get Target machine instruction info - /*const TargetInstrInfo& TMI = targ.getInstrInfo(); + const TargetInstrInfo *TMI = target.getInstrInfo(); - //Check each instruction and look for calls or if/else statements - unsigned count = 0; + //Check each instruction and look for calls for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { - //Get opcode to check instruction type - MachineOpCode OC = I->getOpcode(); - if(TMI.isControlFlow(OC) && (count+1 < BI->size())) - return false; - count++; - }*/ + //Get opcode to check instruction type + MachineOpCode OC = I->getOpcode(); + if(TMI->isCall(OC)) + return false; + + } return true; } @@ -225,8 +268,8 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { //for each instruction int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { - const TargetInstrInfo & mii = target.getInstrInfo(); - const TargetSchedInfo & msi = target.getSchedInfo(); + const TargetInstrInfo *mii = target.getInstrInfo(); + const TargetSchedInfo *msi = target.getSchedInfo(); int ResMII = 0; @@ -236,7 +279,7 @@ int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { //Get resource usage for this instruction - InstrRUsage rUsage = msi.getInstrRUsage(I->getOpcode()); + InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode()); std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; //Loop over resources in each cycle and increments their usage count @@ -254,7 +297,7 @@ int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { //Find maximum usage count //Get max number of instructions that can be issued at once. (FIXME) - int issueSlots = msi.maxNumIssueTotal; + int issueSlots = msi->maxNumIssueTotal; for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) { @@ -273,19 +316,17 @@ int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { finalUsageCount = ceil(1.0 * usageCount / issueSlots); - DEBUG(std::cerr << "Resource ID: " << RB->first << " (usage=" << usageCount << ", resourceNum=X" << ", issueSlots=" << issueSlots << ", finalUsage=" << finalUsageCount << ")\n"); - //Only keep track of the max ResMII = std::max( (int) finalUsageCount, ResMII); } - DEBUG(std::cerr << "Final Resource MII: " << ResMII << "\n"); - return ResMII; } +/// calculateRecMII - Calculates the value of the highest recurrence +/// By value we mean the total latency int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) { std::vector<MSchedGraphNode*> vNodes; //Loop over all nodes in the graph @@ -297,18 +338,18 @@ int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) { int RecMII = 0; for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { - std::cerr << "Recurrence: \n"; - for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) { + DEBUG(for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) { std::cerr << **N << "\n"; - } + }); RecMII = std::max(RecMII, I->first); - std::cerr << "End Recurrence with RecMII: " << I->first << "\n"; - } - DEBUG(std::cerr << "RecMII: " << RecMII << "\n"); - + } + return MII; } +/// calculateNodeAttributes - The following properties are calculated for +/// each node in the dependence graph: ASAP, ALAP, Depth, Height, and +/// MOB. void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) { //Loop over the nodes and add them to the map @@ -348,15 +389,18 @@ void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) } +/// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode *destNode) { if(destNode == 0 || srcNode ==0) return false; - + bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode))); return findEdge; } + +/// calculateASAP - Calculates the int ModuloSchedulingPass::calculateASAP(MSchedGraphNode *node, int MII, MSchedGraphNode *destNode) { DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n"); @@ -789,7 +833,8 @@ void ModuloSchedulingPass::orderNodes() { for(std::vector<std::vector<MSchedGraphNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) { DEBUG(std::cerr << "Processing set in S\n"); - dumpIntersection(*CurrentSet); + DEBUG(dumpIntersection(*CurrentSet)); + //Result of intersection std::vector<MSchedGraphNode*> IntersectCurrent; @@ -989,7 +1034,7 @@ void ModuloSchedulingPass::computeSchedule() { bool success = false; while(!success) { - + //Loop over the final node order and process each node for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) { @@ -1013,7 +1058,7 @@ void ModuloSchedulingPass::computeSchedule() { if(!ignoreEdge(*schedNode, *I)) { int diff = (*I)->getInEdge(*schedNode).getIteDiff(); int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II; - DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); + DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); EarlyStart = std::max(EarlyStart, ES_Temp); hasPred = true; @@ -1124,76 +1169,85 @@ bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node, return success; } -/*void ModuloSchedulingPass::saveValue(const MachineInstr *inst, std::set<const Value*> &valuestoSave, std::vector<Value*> *valuesForNode) { - int numFound = 0; - Instruction *tmp; - - //For each value* in this inst that is a def, we want to save a copy - //Target info - const TargetInstrInfo & mii = target.getInstrInfo(); - for(unsigned i=0; i < inst->getNumOperands(); ++i) { - //get machine operand - const MachineOperand &mOp = inst->getOperand(i); - if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { - //Save copy in tmpInstruction - numFound++; - tmp = TmpInstruction(mii.getMachineCodeFor(mOp.getVRegValue()), - mOp.getVRegValue()); - valuesForNode->push_back(tmp); - } - } +void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, std::vector<Value*> > > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) { - assert(numFound == 1 && "We should have only found one def to this virtual register!"); -}*/ - -void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues) { - + //Keep a map to easily know whats in the kernel std::map<int, std::set<const MachineInstr*> > inKernel; int maxStageCount = 0; + MSchedGraphNode *branch = 0; + for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { maxStageCount = std::max(maxStageCount, I->second); //Ignore the branch, we will handle this separately - if(I->first->isBranch()) + if(I->first->isBranch()) { + branch = I->first; continue; + } //Put int the map so we know what instructions in each stage are in the kernel - if(I->second > 0) { - DEBUG(std::cerr << "Inserting instruction " << *(I->first->getInst()) << " into map at stage " << I->second << "\n"); - inKernel[I->second].insert(I->first->getInst()); - } + DEBUG(std::cerr << "Inserting instruction " << *(I->first->getInst()) << " into map at stage " << I->second << "\n"); + inKernel[I->second].insert(I->first->getInst()); } - //Now write the prologues - for(int i = 1; i <= maxStageCount; ++i) { - BasicBlock *llvmBB = new BasicBlock(); + //Get target information to look at machine operands + const TargetInstrInfo *mii = target.getInstrInfo(); + + //Now write the prologues + for(int i = 0; i < maxStageCount; ++i) { + BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (origBB->getBasicBlock()->getParent())); MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); - //Loop over original machine basic block. If we see an instruction from this - //stage that is NOT in the kernel, then it needs to be added into the prologue - //We go in order to preserve dependencies - for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { - if(inKernel[i].count(&*MI)) { - inKernel[i].erase(&*MI); - if(inKernel[i].size() <= 0) - break; - else - continue; - } - else { - DEBUG(std::cerr << "Writing instruction to prologue\n"); - machineBB->push_back(MI->clone()); + DEBUG(std::cerr << "i=" << i << "\n"); + for(int j = 0; j <= i; ++j) { + for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { + if(inKernel[j].count(&*MI)) { + machineBB->push_back(MI->clone()); + + Instruction *tmp; + + //After cloning, we may need to save the value that this instruction defines + for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) { + //get machine operand + const MachineOperand &mOp = MI->getOperand(opNum); + if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { + + + //Check if this is a value we should save + if(valuesToSave.count(mOp.getVRegValue())) { + //Save copy in tmpInstruction + tmp = new TmpInstruction(mOp.getVRegValue()); + + DEBUG(std::cerr << "Value: " << mOp.getVRegValue() << " New Value: " << tmp << " Stage: " << i << "\n"); + newValues[mOp.getVRegValue()][i].push_back(tmp); + newValLocation[tmp] = machineBB; + + DEBUG(std::cerr << "Machine Instr Operands: " << mOp.getVRegValue() << ", 0, " << tmp << "\n"); + + //Create machine instruction and put int machineBB + MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + + DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n"); + } + } + } + } } } - (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); + + //Stick in branch at the end + machineBB->push_back(branch->getInst()->clone()); + + (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); prologues.push_back(machineBB); llvm_prologues.push_back(llvmBB); } } -void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues) { +void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, std::vector<Value*> > > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation ) { + std::map<int, std::set<const MachineInstr*> > inKernel; int maxStageCount = 0; for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { @@ -1204,197 +1258,585 @@ void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epil continue; //Put int the map so we know what instructions in each stage are in the kernel - if(I->second > 0) { - DEBUG(std::cerr << "Inserting instruction " << *(I->first->getInst()) << " into map at stage " << I->second << "\n"); - inKernel[I->second].insert(I->first->getInst()); - } + inKernel[I->second].insert(I->first->getInst()); } + std::map<Value*, Value*> valPHIs; + //Now write the epilogues - for(int i = 1; i <= maxStageCount; ++i) { - BasicBlock *llvmBB = new BasicBlock(); + for(int i = maxStageCount-1; i >= 0; --i) { + BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (origBB->getBasicBlock()->getParent())); MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); - - bool last = false; - for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { - - if(!last) { - if(inKernel[i].count(&*MI)) { - machineBB->push_back(MI->clone()); - inKernel[i].erase(&*MI); - if(inKernel[i].size() <= 0) - last = true; - } + + DEBUG(std::cerr << " i: " << i << "\n"); + + //Spit out phi nodes + for(std::map<Value*, std::map<int, std::vector<Value*> > >::iterator V = newValues.begin(), E = newValues.end(); + V != E; ++V) { + + DEBUG(std::cerr << "Writing phi for" << *(V->first)); + for(std::map<int, std::vector<Value*> >::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) { + if(I->first == i) { + DEBUG(std::cerr << "BLAH " << i << "\n"); + + //Vector must have two elements in it: + assert(I->second.size() == 2 && "Vector size should be two\n"); + + Instruction *tmp = new TmpInstruction(I->second[0]); + MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(I->second[0]).addReg(I->second[1]).addRegDef(tmp); + valPHIs[V->first] = tmp; + } } - else - machineBB->push_back(MI->clone()); - + } + for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { + for(int j=maxStageCount; j > i; --j) { + if(inKernel[j].count(&*MI)) { + DEBUG(std::cerr << "Cloning instruction " << *MI << "\n"); + MachineInstr *clone = MI->clone(); + + //Update operands that need to use the result from the phi + for(unsigned i=0; i < clone->getNumOperands(); ++i) { + //get machine operand + const MachineOperand &mOp = clone->getOperand(i); + if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) { + if(valPHIs.count(mOp.getVRegValue())) { + //Update the operand in the cloned instruction + clone->getOperand(i).setValueReg(valPHIs[mOp.getVRegValue()]); + } + } + } + machineBB->push_back(clone); + } + } } + (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); epilogues.push_back(machineBB); llvm_epilogues.push_back(llvmBB); } - } +void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, std::vector<Value*> > > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) { + + //Keep track of operands that are read and saved from a previous iteration. The new clone + //instruction will use the result of the phi instead. + std::map<Value*, Value*> finalPHIValue; + std::map<Value*, Value*> kernelValue; + //Create TmpInstructions for the final phis + for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { -void ModuloSchedulingPass::reconstructLoop(const MachineBasicBlock *BB) { - - //The new loop will consist of an prologue, the kernel, and one or more epilogues. + //Clone instruction + const MachineInstr *inst = I->first->getInst(); + MachineInstr *instClone = inst->clone(); + + //If this instruction is from a previous iteration, update its operands + if(I->second > 0) { + //Loop over Machine Operands + const MachineInstr *inst = I->first->getInst(); + for(unsigned i=0; i < inst->getNumOperands(); ++i) { + //get machine operand + const MachineOperand &mOp = inst->getOperand(i); + + if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { + //If its in the value saved, we need to create a temp instruction and use that instead + if(valuesToSave.count(mOp.getVRegValue())) { + TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); + + //Update the operand in the cloned instruction + instClone->getOperand(i).setValueReg(tmp); + + //save this as our final phi + finalPHIValue[mOp.getVRegValue()] = tmp; + newValLocation[tmp] = machineBB; + } + } - std::vector<MachineBasicBlock*> prologues; - std::vector<BasicBlock*> llvm_prologues; + } + //Insert into machine basic block + machineBB->push_back(instClone); + + } + //Otherwise we just check if we need to save a value or not + else { + //Insert into machine basic block + machineBB->push_back(instClone); + + //Loop over Machine Operands + const MachineInstr *inst = I->first->getInst(); + for(unsigned i=0; i < inst->getNumOperands(); ++i) { + //get machine operand + const MachineOperand &mOp = inst->getOperand(i); + + if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { + if(valuesToSave.count(mOp.getVRegValue())) { + + TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); + + //Create new machine instr and put in MBB + MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + + //Save for future cleanup + kernelValue[mOp.getVRegValue()] = tmp; + newValLocation[tmp] = machineBB; + } + } + } + } + } - //Write prologue - writePrologues(prologues, BB, llvm_prologues); + //Clean up by writing phis + for(std::map<Value*, std::map<int, std::vector<Value*> > >::iterator V = newValues.begin(), E = newValues.end(); + V != E; ++V) { - //Print out prologue - for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end(); - I != E; ++I) { - std::cerr << "PROLOGUE\n"; - (*I)->print(std::cerr); - } + DEBUG(std::cerr << "Writing phi for" << *(V->first)); + + //FIXME + int maxStage = 1; + //Last phi + Instruction *lastPHI = 0; - std::vector<MachineBasicBlock*> epilogues; - std::vector<BasicBlock*> llvm_epilogues; + for(std::map<int, std::vector<Value*> >::iterator I = V->second.begin(), IE = V->second.end(); + I != IE; ++I) { + + int stage = I->first; - //Write epilogues - writeEpilogues(epilogues, BB, llvm_epilogues); + DEBUG(std::cerr << "Stage: " << I->first << " vector size: " << I->second.size() << "\n"); - //Print out prologue - for(std::vector<MachineBasicBlock*>::iterator I = epilogues.begin(), E = epilogues.end(); - I != E; ++I) { - std::cerr << "EPILOGUE\n"; - (*I)->print(std::cerr); - } + //Assert if this vector is ever greater then 1. This should not happen + //FIXME: Get rid of vector if we convince ourselves this won't happn + assert(I->second.size() == 1 && "Vector of values should be of size \n"); - //create a vector of epilogues corresponding to each stage - /*std::vector<MachineBasicBlock*> epilogues; + //We must handle the first and last phi specially + if(stage == maxStage) { + //The resulting value must be the Value* we created earlier + assert(lastPHI != 0 && "Last phi is NULL!\n"); + MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPHI).addReg(I->second[0]).addRegDef(finalPHIValue[V->first]); + I->second.push_back(finalPHIValue[V->first]); + |