From ae692f2baedf53504af2715993b166950e185a55 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 12 Nov 2012 19:28:57 +0000 Subject: misched: Infrastructure for weak DAG edges. This adds support for weak DAG edges to the general scheduling infrastructure in preparation for MachineScheduler support for heuristics based on weak edges. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167738 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a4817d09c0..71cc072d47 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -310,6 +310,10 @@ void ReadyQueue::dump() { void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -338,6 +342,10 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); + if (PredEdge->isWeak()) { + --PredSU->WeakSuccsLeft; + return; + } #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -530,17 +538,20 @@ void ScheduleDAGMI::postprocessDAG() { } // Release all DAG roots for scheduling. +// +// Nodes with unreleased weak edges can still be roots. void ScheduleDAGMI::releaseRoots() { SmallVector BotRoots; for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); + if (!I->NumPredsLeft && SU != &EntrySU) + SchedImpl->releaseTopNode(SU); // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - BotRoots.push_back(&(*I)); + if (!I->NumSuccsLeft && SU != &ExitSU) + BotRoots.push_back(SU); } // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. @@ -555,13 +566,12 @@ void ScheduleDAGMI::initQueues() { // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); - // Release edges from the special Entry node or to the special Exit node. + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + releaseRoots(); + releaseSuccessors(&EntrySU); releasePredecessors(&ExitSU); - // Release all DAG roots for scheduling. - releaseRoots(); - SchedImpl->registerRoots(); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); -- cgit v1.2.3-70-g09d2 From 9b5caaa9c452f262a52dd5ac7ebbc722da5a63de Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 12 Nov 2012 19:40:10 +0000 Subject: misched: Target-independent support for load/store clustering. This infrastructure is generally useful for any target that wants to strongly prefer two instructions to be adjacent after scheduling. A following checkin will add target-specific hooks with unit tests. Then this feature will be enabled by default with misched. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167742 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineScheduler.h | 27 ++++- include/llvm/Target/TargetInstrInfo.h | 13 +++ lib/CodeGen/MachineScheduler.cpp | 188 ++++++++++++++++++++++++++++++-- lib/Target/ARM/ARMBaseInstrInfo.cpp | 6 + 4 files changed, 220 insertions(+), 14 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 31bd606f93..08f9182805 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -202,6 +202,10 @@ protected: RegisterClassInfo *RegClassInfo; MachineSchedStrategy *SchedImpl; + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + /// Ordered list of DAG postprocessing steps. std::vector Mutations; @@ -226,6 +230,10 @@ protected: IntervalPressure BotPressure; RegPressureTracker BotRPTracker; + /// Record the next node in a scheduled cluster. + const SUnit *NextClusterPred; + const SUnit *NextClusterSucc; + #ifndef NDEBUG /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. @@ -236,24 +244,35 @@ public: ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), - RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure) { + Topo(SUnits, &ExitSU), RPTracker(RegPressure), CurrentTop(), + TopRPTracker(TopPressure), CurrentBottom(), BotRPTracker(BotPressure), + NextClusterPred(NULL), NextClusterSucc(NULL) { #ifndef NDEBUG NumInstrsScheduled = 0; #endif } virtual ~ScheduleDAGMI() { + DeleteContainerPointers(Mutations); delete SchedImpl; } /// Add a postprocessing step to the DAG builder. /// Mutations are applied in the order that they are added after normal DAG /// building and before MachineSchedStrategy initialization. + /// + /// ScheduleDAGMI takes ownership of the Mutation object. void addMutation(ScheduleDAGMutation *Mutation) { Mutations.push_back(Mutation); } + /// \brief Add a DAG edge to the given SU with the given predecessor + /// dependence data. + /// + /// \returns true if the edge may be added without creating a cycle OR if an + /// equivalent edge already existed (false indicates failure). + bool addEdge(SUnit *SuccSU, const SDep &PredDep); + MachineBasicBlock::iterator top() const { return CurrentTop; } MachineBasicBlock::iterator bottom() const { return CurrentBottom; } @@ -285,6 +304,10 @@ public: return RegionCriticalPSets; } + const SUnit *getNextClusterPred() const { return NextClusterPred; } + + const SUnit *getNextClusterSucc() const { return NextClusterSucc; } + protected: // Top-Level entry points for the schedule() driver... diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 4570813ba6..4f8ae01291 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -621,6 +621,19 @@ public: return false; } + /// \brief Get the base register and byte offset of a load/store instr. + virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt, + unsigned &BaseReg, unsigned &Offset, + const TargetRegisterInfo *TRI) const { + return false; + } + + virtual bool shouldScheduleLoadsNear(MachineInstr *FirstLdSt, + MachineInstr *SecondLdSt, + unsigned NumLoads) const { + return false; + } + /// ReverseBranchCondition - Reverses the branch condition of the specified /// condition list, returning false on success and true if it cannot be /// reversed. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 71cc072d47..dbab6bae28 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -58,6 +58,10 @@ static cl::opt ILPWindow("ilp-window", cl::Hidden, "before attempting to balance ILP"), cl::init(10U)); +// Experimental heuristics +static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, + cl::desc("Enable load clustering.")); + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -303,6 +307,17 @@ void ReadyQueue::dump() { // preservation. //===----------------------------------------------------------------------===// +bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPred(SuccSU, PredDep.getSUnit()); + SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); + // Return true regardless of whether a new edge needed to be inserted. + return true; +} + /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -312,6 +327,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { if (SuccEdge->isWeak()) { --SuccSU->WeakPredsLeft; + if (SuccEdge->isCluster()) + NextClusterSucc = SuccSU; return; } #ifndef NDEBUG @@ -344,6 +361,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { if (PredEdge->isWeak()) { --PredSU->WeakSuccsLeft; + if (PredEdge->isCluster()) + NextClusterPred = PredSU; return; } #ifndef NDEBUG @@ -482,6 +501,8 @@ updateScheduledPressure(std::vector NewMaxPressure) { void ScheduleDAGMI::schedule() { buildDAGWithRegPressure(); + Topo.InitDAGTopologicalSorting(); + postprocessDAG(); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) @@ -562,6 +583,8 @@ void ScheduleDAGMI::releaseRoots() { /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues() { + NextClusterSucc = NULL; + NextClusterPred = NULL; // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); @@ -664,6 +687,119 @@ void ScheduleDAGMI::dumpSchedule() const { } #endif +namespace { +/// \brief Post-process the DAG to create cluster edges between neighboring +/// loads. +class LoadClusterMutation : public ScheduleDAGMutation { + struct LoadInfo { + SUnit *SU; + unsigned BaseReg; + unsigned Offset; + LoadInfo(SUnit *su, unsigned reg, unsigned ofs) + : SU(su), BaseReg(reg), Offset(ofs) {} + }; + static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; +public: + LoadClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) + : TII(tii), TRI(tri) {} + + virtual void apply(ScheduleDAGMI *DAG); +protected: + void clusterNeighboringLoads(ArrayRef Loads, ScheduleDAGMI *DAG); +}; +} // anonymous + +bool LoadClusterMutation::LoadInfoLess( + const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS) { + if (LHS.BaseReg != RHS.BaseReg) + return LHS.BaseReg < RHS.BaseReg; + return LHS.Offset < RHS.Offset; +} + +void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, + ScheduleDAGMI *DAG) { + SmallVector LoadRecords; + for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { + SUnit *SU = Loads[Idx]; + unsigned BaseReg; + unsigned Offset; + if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); + } + if (LoadRecords.size() < 2) + return; + std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + unsigned ClusterLength = 1; + for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { + if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { + ClusterLength = 1; + continue; + } + + SUnit *SUa = LoadRecords[Idx].SU; + SUnit *SUb = LoadRecords[Idx+1].SU; + if (TII->shouldScheduleLoadsNear(SUa->getInstr(), SUb->getInstr(), + ClusterLength) + && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + + DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since nearby + // loads should have effectively the same inputs. + for (SUnit::const_succ_iterator + SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { + if (SI->getSUnit() == SUb) + continue; + DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); + DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + } + ++ClusterLength; + } + else + ClusterLength = 1; + } +} + +/// \brief Callback from DAG postProcessing to create cluster edges for loads. +void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { + // Map DAG NodeNum to store chain ID. + DenseMap StoreChainIDs; + // Map each store chain to a set of dependent loads. + SmallVector, 32> StoreChainDependents; + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->mayLoad()) + continue; + unsigned ChainPredID = DAG->SUnits.size(); + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->isCtrl()) { + ChainPredID = PI->getSUnit()->NodeNum; + break; + } + } + // Check if this chain-like pred has been seen + // before. ChainPredID==MaxNodeID for loads at the top of the schedule. + unsigned NumChains = StoreChainDependents.size(); + std::pair::iterator, bool> Result = + StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); + if (Result.second) + StoreChainDependents.resize(NumChains + 1); + StoreChainDependents[Result.first->second].push_back(SU); + } + // Iterate over the store chains. + for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) + clusterNeighboringLoads(StoreChainDependents[Idx], DAG); +} + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -676,9 +812,10 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, - BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, - SingleMax, MultiPressure, NextDefUse, NodeOrder}; + NoCand, SingleExcess, SingleCritical, Cluster, + ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, + TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, + NodeOrder}; #ifndef NDEBUG static const char *getReasonStr(ConvergingScheduler::CandReason Reason); @@ -1029,6 +1166,8 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG @@ -1424,6 +1563,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal, } return false; } + static bool tryGreater(unsigned TryVal, unsigned CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, @@ -1440,6 +1580,10 @@ static bool tryGreater(unsigned TryVal, unsigned CandVal, return false; } +static unsigned getWeakLeft(const SUnit *SU, bool isTop) { + return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1482,6 +1626,26 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, if (Cand.Reason == SingleCritical) Cand.Reason = MultiPressure; + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *NextClusterSU = + Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, + TryCand, Cand, Cluster)) + return; + // Currently, weak edges are for clustering, so we hard-code that reason. + // However, deferring the current TryCand will not change Cand's reason. + CandReason OrigReason = Cand.Reason; + if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), + getWeakLeft(Cand.SU, Zone.isTop()), + TryCand, Cand, Cluster)) { + Cand.Reason = OrigReason; + return; + } // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -1528,15 +1692,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, // Prefer immediate defs/users of the last scheduled instruction. This is a // nice pressure avoidance strategy that also conserves the processor's // register renaming resources and keeps the machine code readable. - if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { - TryCand.Reason = NextDefUse; - return; - } - if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { - if (Cand.Reason > NextDefUse) - Cand.Reason = NextDefUse; + if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + TryCand, Cand, NextDefUse)) return; - } + // Fall through to original instruction order. if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { @@ -1582,6 +1741,7 @@ const char *ConvergingScheduler::getReasonStr( case NoCand: return "NOCAND "; case SingleExcess: return "REG-EXCESS"; case SingleCritical: return "REG-CRIT "; + case Cluster: return "CLUSTER "; case SingleMax: return "REG-MAX "; case MultiPressure: return "REG-MULTI "; case ResourceReduce: return "RES-REDUCE"; @@ -1822,7 +1982,11 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { assert((!ForceTopDown || !ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMI(C, new ConvergingScheduler()); + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); + // Register DAG post-processors. + if (EnableLoadCluster) + DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + return DAG; } static MachineSchedRegistry ConvergingSchedRegistry("converge", "Standard converging scheduler.", diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3c7bb24f42..3288a71171 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1373,6 +1373,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, /// only return true if the base pointers are the same and the only differences /// between the two addresses is the offset. It also returns the offsets by /// reference. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const { @@ -1447,6 +1450,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, /// from the common base address. It returns true if it decides it's desirable /// to schedule the two loads together. "NumLoads" is the number of loads that /// have already been scheduled after Load1. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { -- cgit v1.2.3-70-g09d2 From 6996fd0b543cf8bd4a0d4e09e80a168f0ae052c5 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 12 Nov 2012 19:52:20 +0000 Subject: misched: Target-independent support for MacroFusion. Uses the infrastructure from r167742 to support clustering instructure that the target processor can "fuse". e.g. cmp+jmp. Next step: target hook implementations with test cases, and enable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167744 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 7 ++++ lib/CodeGen/MachineScheduler.cpp | 66 ++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 5 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 4f8ae01291..97fddeeca1 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -634,6 +634,13 @@ public: return false; } + /// \brief Can this target fuse the given instructions if they are scheduled + /// adjacent. + virtual bool shouldScheduleAdjacent(MachineInstr* First, + MachineInstr *Second) const { + return false; + } + /// ReverseBranchCondition - Reverses the branch condition of the specified /// condition list, returning false on success and true if it cannot be /// reversed. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index dbab6bae28..b05d7263cd 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -62,6 +62,10 @@ static cl::opt ILPWindow("ilp-window", cl::Hidden, static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering.")); +// Experimental heuristics +static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion.")); + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -308,11 +312,13 @@ void ReadyQueue::dump() { //===----------------------------------------------------------------------===// bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { - // Do not use WillCreateCycle, it assumes SD scheduling. - // If Pred is reachable from Succ, then the edge creates a cycle. - if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) - return false; - Topo.AddPred(SuccSU, PredDep.getSUnit()); + if (SuccSU != &ExitSU) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPred(SuccSU, PredDep.getSUnit()); + } SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); // Return true regardless of whether a new edge needed to be inserted. return true; @@ -687,6 +693,10 @@ void ScheduleDAGMI::dumpSchedule() const { } #endif +//===----------------------------------------------------------------------===// +// LoadClusterMutation - DAG post-processing to cluster loads. +//===----------------------------------------------------------------------===// + namespace { /// \brief Post-process the DAG to create cluster edges between neighboring /// loads. @@ -800,6 +810,50 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { clusterNeighboringLoads(StoreChainDependents[Idx], DAG); } +//===----------------------------------------------------------------------===// +// MacroFusion - DAG post-processing to encourage fusion of macro ops. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between instructions +/// that may be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + const TargetInstrInfo *TII; +public: + MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + + virtual void apply(ScheduleDAGMI *DAG); +}; +} // anonymous + +/// \brief Callback from DAG postProcessing to create cluster edges to encourage +/// fused operations. +void MacroFusion::apply(ScheduleDAGMI *DAG) { + // For now, assume targets can only fuse with the branch. + MachineInstr *Branch = DAG->ExitSU.getInstr(); + if (!Branch) + return; + + for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { + SUnit *SU = &DAG->SUnits[--Idx]; + if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + continue; + + // Create a single weak edge from SU to ExitSU. The only effect is to cause + // bottom-up scheduling to heavily prioritize the clustered SU. There is no + // need to copy predecessor edges from ExitSU to SU, since top-down + // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling + // of SU, we could create an artificial edge from the deepest root, but it + // hasn't been needed yet. + bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + (void)Success; + assert(Success && "No DAG nodes should be reachable from ExitSU"); + + DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + break; + } +} + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -1986,6 +2040,8 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { // Register DAG post-processors. if (EnableLoadCluster) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + if (EnableMacroFusion) + DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } static MachineSchedRegistry -- cgit v1.2.3-70-g09d2 From a7d2d564d918a9cb9105d3b2b4176b45af36a20e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 12 Nov 2012 21:28:10 +0000 Subject: misched: rename interfaceto avoid gcc warnings git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167753 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 6 +++--- lib/CodeGen/MachineScheduler.cpp | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 97fddeeca1..95d2b916aa 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -628,9 +628,9 @@ public: return false; } - virtual bool shouldScheduleLoadsNear(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, - unsigned NumLoads) const { + virtual bool shouldClusterLoads(MachineInstr *FirstLdSt, + MachineInstr *SecondLdSt, + unsigned NumLoads) const { return false; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index b05d7263cd..ee8138c0c4 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -754,8 +754,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, SUnit *SUa = LoadRecords[Idx].SU; SUnit *SUb = LoadRecords[Idx+1].SU; - if (TII->shouldScheduleLoadsNear(SUa->getInstr(), SUb->getInstr(), - ClusterLength) + if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" -- cgit v1.2.3-70-g09d2 From ad1cc1d1bfc0accd3f1af5c02ac367ff46a4bfdf Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 13 Nov 2012 08:47:29 +0000 Subject: misched: Allow subtargets to enable misched and dependent options. This allows me to begin enabling (or backing out) misched by default for one subtarget at a time. To run misched we typically want to: - Disable SelectionDAG scheduling (use the source order scheduler) - Enable more aggressive coalescing (until we decide to always run the coalescer this way) - Enable MachineScheduler pass itself. Disabling PostRA sched may follow for some subtargets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167826 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSubtargetInfo.h | 7 +++++ lib/CodeGen/MachineScheduler.cpp | 4 +-- lib/CodeGen/Passes.cpp | 5 ++- lib/CodeGen/RegisterCoalescer.cpp | 44 +++++++++++++++++++++------ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 4 ++- lib/Target/TargetSubtargetInfo.cpp | 4 +++ 6 files changed, 54 insertions(+), 14 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 6db96d980b..3f22f47a0d 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -54,6 +54,13 @@ public: return 0; } + /// \brief True if the subtarget should run MachineScheduler after aggressive + /// coalescing. + /// + /// This currently replaces the SelectionDAG scheduler with the "source" order + /// scheduler. It does not yet disable the postRA scheduler. + virtual bool enableMachineScheduler() const; + // enablePostRAScheduler - If the target can benefit from post-regalloc // scheduling and the specified optimization level meets the requirement // return true to enable post-register-allocation scheduling. In diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ee8138c0c4..8d43360e67 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -60,11 +60,11 @@ static cl::opt ILPWindow("ilp-window", cl::Hidden, // Experimental heuristics static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, - cl::desc("Enable load clustering.")); + cl::desc("Enable load clustering."), cl::init(true)); // Experimental heuristics static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion.")); + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 4ea21d4ff7..ee6e2838c2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/CommandLine.h" @@ -241,7 +242,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) disablePass(&EarlyIfConverterID); // Temporarily disable experimental passes. - substitutePass(&MachineSchedulerID, 0); + const TargetSubtargetInfo &ST = TM->getSubtarget(); + if (!ST.enableMachineScheduler()) + disablePass(&MachineSchedulerID); } /// Insert InsertedPassID pass after TargetPassID. diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index e4c2044b25..8e6533c747 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -45,6 +45,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include using namespace llvm; @@ -64,16 +65,16 @@ EnableJoining("join-liveintervals", cl::init(true)); // Temporary flag to test critical edge unsplitting. -static cl::opt +static cl::opt EnableJoinSplits("join-splitedges", - cl::desc("Coalesce copies on split edges (default=false)"), - cl::init(false), cl::Hidden); + cl::desc("Coalesce copies on split edges (default=subtarget)"), + cl::init(cl::BOU_UNSET), cl::Hidden); // Temporary flag to test global copy optimization. -static cl::opt +static cl::opt EnableGlobalCopies("join-globalcopies", - cl::desc("Coalesce copies that don't locally define an lrg"), - cl::init(false)); + cl::desc("Coalesce copies that span blocks (default=subtarget)"), + cl::init(cl::BOU_UNSET), cl::Hidden); static cl::opt VerifyCoalescing("verify-coalescing", @@ -94,6 +95,14 @@ namespace { AliasAnalysis *AA; RegisterClassInfo RegClassInfo; + /// \brief True if the coalescer should aggressively coalesce global copies + /// in favor of keeping local copies. + bool JoinGlobalCopies; + + /// \brief True if the coalescer should aggressively coalesce fall-thru + /// blocks exclusively containing copies. + bool JoinSplitEdges; + /// WorkList - Copy instructions yet to be coalesced. SmallVector WorkList; SmallVector LocalWorkList; @@ -1943,6 +1952,10 @@ namespace { // // EnableGlobalCopies assumes that the primary sort key is loop depth. struct MBBPriorityCompare { + bool JoinSplitEdges; + + MBBPriorityCompare(bool joinsplits): JoinSplitEdges(joinsplits) {} + bool operator()(const MBBPriorityInfo &LHS, const MBBPriorityInfo &RHS) const { // Deeper loops first @@ -1950,7 +1963,7 @@ namespace { return LHS.Depth > RHS.Depth; // Try to unsplit critical edges next. - if (EnableJoinSplits && LHS.IsSplit != RHS.IsSplit) + if (JoinSplitEdges && LHS.IsSplit != RHS.IsSplit) return LHS.IsSplit; // Prefer blocks that are more connected in the CFG. This takes care of @@ -2011,7 +2024,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // Collect all copy-like instructions in MBB. Don't start coalescing anything // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); - if (EnableGlobalCopies) { + if (JoinGlobalCopies) { // Coalesce copies bottom-up to coalesce local defs before local uses. They // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by @@ -2061,13 +2074,13 @@ void RegisterCoalescer::joinAllIntervals() { MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), isSplitEdge(MBB))); } - std::sort(MBBs.begin(), MBBs.end(), MBBPriorityCompare()); + std::sort(MBBs.begin(), MBBs.end(), MBBPriorityCompare(JoinSplitEdges)); // Coalesce intervals in MBB priority order. unsigned CurrDepth = UINT_MAX; for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { // Try coalescing the collected local copies for deeper loops. - if (EnableGlobalCopies && MBBs[i].Depth < CurrDepth) + if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) coalesceLocals(); copyCoalesceInMBB(MBBs[i].MBB); } @@ -2097,6 +2110,17 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis(); Loops = &getAnalysis(); + const TargetSubtargetInfo &ST = TM->getSubtarget(); + if (EnableGlobalCopies == cl::BOU_UNSET) + JoinGlobalCopies = ST.enableMachineScheduler(); + else + JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + + if (EnableJoinSplits == cl::BOU_UNSET) + JoinSplitEdges = ST.enableMachineScheduler(); + else + JoinSplitEdges = (EnableJoinSplits == cl::BOU_TRUE); + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " << MF->getName() << '\n'); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c314fa5b51..d63a5b7a3b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -45,6 +45,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -216,8 +217,9 @@ namespace llvm { ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &ST = IS->TM.getSubtarget(); - if (OptLevel == CodeGenOpt::None || + if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 59ffdea00e..af0cef62d5 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -22,6 +22,10 @@ TargetSubtargetInfo::TargetSubtargetInfo() {} TargetSubtargetInfo::~TargetSubtargetInfo() {} +bool TargetSubtargetInfo::enableMachineScheduler() const { + return false; +} + bool TargetSubtargetInfo::enablePostRAScheduler( CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, -- cgit v1.2.3-70-g09d2