diff options
Diffstat (limited to 'lib')
50 files changed, 1636 insertions, 518 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 64d6186d91..22535fe5b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -307,47 +307,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (!SPDecl.isSubprogram()) { - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addFlag(Arg, dwarf::DW_AT_artificial); - if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, Arg); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); - } - } - // Pick up abstract subprogram DIE. + // If we're updating an abstract DIE, then we will be adding the children and + // object pointer later on. But what we don't want to do is process the + // concrete DIE twice. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); + } else { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (!SPDecl.isSubprogram()) { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(Args.getElement(i)); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); + if (ATy.isObjectPointer()) + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, Arg); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } + } } SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, @@ -831,7 +835,7 @@ void DwarfDebug::endModule() { LexicalScope *Scope = new LexicalScope(NULL, DIDescriptor(SP), NULL, false); DeadFnScopeMap[SP] = Scope; - + // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index c55e8b7898..de16932c06 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,15 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +// Threshold to very roughly model an out-of-order processor's instruction +// buffers. If the actual value of this threshold matters much in practice, then +// it can be specified by the machine model. For now, it's an experimental +// tuning knob to determine when and if it matters. +static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, + cl::desc("Allow expected latency to exceed the critical path by N cycles " + "before attempting to balance ILP"), + cl::init(10U)); + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -220,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingCount = MBB->size(); + unsigned RemainingInstrs = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { @@ -229,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { --RegionEnd; // Count the boundary instruction. - --RemainingCount; + --RemainingInstrs; } // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { + for(;I != MBB->begin(); --I, --RemainingInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -255,7 +264,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingCount << "\n"); + dbgs() << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -268,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // scheduler for the top of it's scheduled region. RegionEnd = Scheduler->begin(); } - assert(RemainingCount == 0 && "Instruction count mismatch!"); + assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); @@ -487,6 +496,13 @@ void ScheduleDAGMI::schedule() { assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); placeDebugValues(); + + DEBUG({ + unsigned BBNum = top()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } /// Build the DAG and setup three register pressure trackers. @@ -627,6 +643,17 @@ void ScheduleDAGMI::placeDebugValues() { FirstDbgValue = NULL; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -635,33 +662,127 @@ namespace { /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class ConvergingScheduler : public MachineSchedStrategy { +public: + /// Represent the type of SchedCandidate found within a single queue. + /// pickNodeBidirectional depends on these listed by decreasing priority. + enum CandReason { + NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, + BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, + SingleMax, MultiPressure, NextDefUse, NodeOrder}; + +#ifndef NDEBUG + static const char *getReasonStr(ConvergingScheduler::CandReason Reason); +#endif + + /// Policy for scheduling the next instruction in the candidate's zone. + struct CandPolicy { + bool ReduceLatency; + unsigned ReduceResIdx; + unsigned DemandResIdx; + + CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + }; + + /// Status of an instruction's critical resource consumption. + struct SchedResourceDelta { + // Count critical resources in the scheduled region required by SU. + unsigned CritResources; + + // Count critical resources from another region consumed by SU. + unsigned DemandedResources; + + SchedResourceDelta(): CritResources(0), DemandedResources(0) {} + + bool operator==(const SchedResourceDelta &RHS) const { + return CritResources == RHS.CritResources + && DemandedResources == RHS.DemandedResources; + } + bool operator!=(const SchedResourceDelta &RHS) const { + return !operator==(RHS); + } + }; /// Store the state used by ConvergingScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { + CandPolicy Policy; + // The best SUnit candidate. SUnit *SU; + // The reason for this candidate. + CandReason Reason; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; - SchedCandidate(): SU(NULL) {} + // Critical resource consumption of the best candidate. + SchedResourceDelta ResDelta; + + SchedCandidate(const CandPolicy &policy) + : Policy(policy), SU(NULL), Reason(NoCand) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + RPDelta = Best.RPDelta; + ResDelta = Best.ResDelta; + } + + void initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel); + }; + + /// Summarize the unscheduled region. + struct SchedRemainder { + // Critical path through the DAG in expected latency. + unsigned CriticalPath; + + // Unscheduled resources + SmallVector<unsigned, 16> RemainingCounts; + // Critical resource for the unscheduled zone. + unsigned CritResIdx; + // Number of micro-ops left to schedule. + unsigned RemainingMicroOps; + // Is the unscheduled zone resource limited. + bool IsResourceLimited; + + unsigned MaxRemainingCount; + + void reset() { + CriticalPath = 0; + RemainingCounts.clear(); + CritResIdx = 0; + RemainingMicroOps = 0; + IsResourceLimited = false; + MaxRemainingCount = 0; + } + + SchedRemainder() { reset(); } + + void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; - /// Represent the type of SchedCandidate found within a single queue. - enum CandResult { - NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure }; /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in whichever direction at has moved, and maintains the state + /// current cycle in the direction of movement, and maintains the state /// of "hazards" and other interlocks at the current cycle. struct SchedBoundary { ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; + SchedRemainder *Rem; ReadyQueue Available; ReadyQueue Pending; bool CheckPending; + // For heuristics, keep a list of the nodes that immediately depend on the + // most recently scheduled node. + SmallPtrSet<const SUnit*, 8> NextSUs; + ScheduleHazardRecognizer *HazardRec; unsigned CurrCycle; @@ -670,34 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy { /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; + // The expected latency of the critical path in this scheduled zone. + unsigned ExpectedLatency; + + // Resources used in the scheduled zone beyond this boundary. + SmallVector<unsigned, 16> ResourceCounts; + + // Cache the critical resources ID in this scheduled zone. + unsigned CritResIdx; + + // Is the scheduled region resource limited vs. latency limited. + bool IsResourceLimited; + + unsigned ExpectedCount; + + // Policy flag: attempt to find ILP until expected latency is covered. + bool ShouldIncreaseILP; + +#ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; +#endif + + void reset() { + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + HazardRec = 0; + CurrCycle = 0; + IssueCount = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + ResourceCounts.resize(1); + assert(!ResourceCounts[0] && "nonzero count for bad resource"); + CritResIdx = 0; + IsResourceLimited = false; + ExpectedCount = 0; + ShouldIncreaseILP = false; +#ifndef NDEBUG + MaxMinLatency = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ResourceCounts.resize(1); + } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), SchedModel(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), - CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), - MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + reset(); + } ~SchedBoundary() { delete HazardRec; } - void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel) { - DAG = dag; - SchedModel = smodel; - } + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, + SchedRemainder *rem); bool isTop() const { return Available.getID() == ConvergingScheduler::TopQID; } + unsigned getUnscheduledLatency(SUnit *SU) const { + if (isTop()) + return SU->getHeight(); + return SU->getDepth(); + } + + unsigned getCriticalCount() const { + return ResourceCounts[CritResIdx]; + } + bool checkHazard(SUnit *SU); + void checkILPPolicy(); + void releaseNode(SUnit *SU, unsigned ReadyCycle); void bumpCycle(); + void countResource(unsigned PIdx, unsigned Cycles); + void bumpNode(SUnit *SU); void releasePending(); @@ -707,11 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy { SUnit *pickOnlyChoice(); }; +private: ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; // State of the top and bottom scheduled instruction boundaries. + SchedRemainder Rem; SchedBoundary Top; SchedBoundary Bot; @@ -736,25 +913,75 @@ public: virtual void releaseBottomNode(SUnit *SU); + virtual void registerRoots(); + protected: - SUnit *pickNodeBidrectional(bool &IsTopNode); + void balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand); + + void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand); + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker); + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); - CandResult pickNodeFromQueue(ReadyQueue &Q, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); #ifndef NDEBUG - void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - PressureElement P = PressureElement()); + void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); #endif }; } // namespace +void ConvergingScheduler::SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector<SUnit>::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } +} + +void ConvergingScheduler::SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) + ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); +} + void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; - Top.init(DAG, SchedModel); - Bot.init(DAG, SchedModel); + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. @@ -803,6 +1030,17 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { Bot.releaseNode(SU, SU->BotReadyCycle); } +void ConvergingScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -821,14 +1059,26 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if (IssueCount + uops > SchedModel->getIssueWidth()) + if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; - + } return false; } +/// If expected latency is covered, disable ILP policy. +void ConvergingScheduler::SchedBoundary::checkILPPolicy() { + if (ShouldIncreaseILP + && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { + ShouldIncreaseILP = false; + DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); + } +} + void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -838,6 +1088,18 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, Pending.push(SU); else Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); |