From 071966f6bf2a535b318995cfa5d0f5b641fb4e14 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:52:49 +0000 Subject: MISched: minor improvement, initialize remaining resources before the first scheduling decision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index c7afa08fcd..d49eda24a7 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1166,6 +1166,16 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } + for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + } + } + MaxRemainingCount = std::max( + RemainingMicroOps * SchedModel->getMicroOpFactor(), + RemainingCounts[CritResIdx]); } void ConvergingScheduler::SchedBoundary:: -- cgit v1.2.3-70-g09d2 From d453960f92993600c56e5b4c004cadca9d4f7dc8 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:52:52 +0000 Subject: MISched: cleanup, use the proper iterator type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170450 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index d49eda24a7..fbbae3877a 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1213,7 +1213,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; unsigned MinLatency = I->getMinLatency(); -- cgit v1.2.3-70-g09d2 From 9c676c2941343aa5c3f933eb0dd97b22cca3d6f4 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:52:54 +0000 Subject: MISched: Remove SchedRemainder::IsResourceLimited. I don't know how to compute it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index fbbae3877a..df3bec12f9 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -953,8 +953,6 @@ public: unsigned CritResIdx; // Number of micro-ops left to schedule. unsigned RemainingMicroOps; - // Is the unscheduled zone resource limited. - bool IsResourceLimited; unsigned MaxRemainingCount; @@ -963,7 +961,6 @@ public: RemainingCounts.clear(); CritResIdx = 0; RemainingMicroOps = 0; - IsResourceLimited = false; MaxRemainingCount = 0; } -- cgit v1.2.3-70-g09d2 From 44fd0bcc40e04cf9aaaf0f33744ae4decbb656c3 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:52:56 +0000 Subject: MISched: Heuristics, compare latency more precisely. It matters more for some targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170452 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 81 +++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 43 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index df3bec12f9..4536059799 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -954,19 +954,25 @@ public: // Number of micro-ops left to schedule. unsigned RemainingMicroOps; - unsigned MaxRemainingCount; - void reset() { CriticalPath = 0; RemainingCounts.clear(); CritResIdx = 0; RemainingMicroOps = 0; - MaxRemainingCount = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); + + unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { + if (!SchedModel->hasInstrSchedModel()) + return 0; + + return std::max( + RemainingMicroOps * SchedModel->getMicroOpFactor(), + RemainingCounts[CritResIdx]); + } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1007,9 +1013,6 @@ public: unsigned ExpectedCount; - // Policy flag: attempt to find ILP until expected latency is covered. - bool ShouldIncreaseILP; - #ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; @@ -1030,7 +1033,6 @@ public: CritResIdx = 0; IsResourceLimited = false; ExpectedCount = 0; - ShouldIncreaseILP = false; #ifndef NDEBUG MaxMinLatency = 0; #endif @@ -1058,7 +1060,7 @@ public: unsigned getUnscheduledLatency(SUnit *SU) const { if (isTop()) return SU->getHeight(); - return SU->getDepth(); + return SU->getDepth() + SU->Latency; } unsigned getCriticalCount() const { @@ -1067,7 +1069,7 @@ public: bool checkHazard(SUnit *SU); - void checkILPPolicy(); + void setLatencyPolicy(CandPolicy &Policy); void releaseNode(SUnit *SU, unsigned ReadyCycle); @@ -1170,9 +1172,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { CritResIdx = PIdx; } } - MaxRemainingCount = std::max( - RemainingMicroOps * SchedModel->getMicroOpFactor(), - RemainingCounts[CritResIdx]); } void ConvergingScheduler::SchedBoundary:: @@ -1281,12 +1280,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// If expected latency is covered, disable ILP policy. -void ConvergingScheduler::SchedBoundary::checkILPPolicy() { - if (ShouldIncreaseILP - && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { - ShouldIncreaseILP = false; - DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); +/// Compute the remaining latency to determine whether ILP should be increased. +void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { + // FIXME: compile time. In all, we visit four queues here one we should only + // need to visit the one that was last popped if we cache the result. + unsigned RemLatency = 0; + for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow + && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { + Policy.ReduceLatency = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); } } @@ -1305,15 +1319,6 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, // Record this node as an immediate dependent of the scheduled node. NextSUs.insert(SU); - - // If CriticalPath has been computed, then check if the unscheduled nodes - // exceed the ILP window. Before registerRoots, CriticalPath==0. - if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) - > Rem->CriticalPath + ILPWindow)) { - ShouldIncreaseILP = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " - << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); - } } /// Move the boundary of scheduled code by one cycle. @@ -1361,9 +1366,6 @@ void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Reset MaxRemainingCount for sanity. - Rem->MaxRemainingCount = 0; - // Check if this resource exceeds the current critical resource by a full // cycle. If so, it becomes the critical resource. if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) @@ -1495,9 +1497,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { /// resources. /// /// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, When releasing each candidate, releaseNode -/// compares the region's critical path to the candidate's height or depth and -/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. void ConvergingScheduler::balanceZones( ConvergingScheduler::SchedBoundary &CriticalZone, ConvergingScheduler::SchedCandidate &CriticalCand, @@ -1506,6 +1506,7 @@ void ConvergingScheduler::balanceZones( if (!CriticalZone.IsResourceLimited) return; + assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); SchedRemainder *Rem = CriticalZone.Rem; @@ -1513,7 +1514,7 @@ void ConvergingScheduler::balanceZones( // remainder, try to reduce it. unsigned RemainingCritCount = Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->MaxRemainingCount - RemainingCritCount) + if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " @@ -1539,12 +1540,9 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &TopCand, ConvergingScheduler::SchedCandidate &BotCand) { - Bot.checkILPPolicy(); - Top.checkILPPolicy(); - if (Bot.ShouldIncreaseILP) - BotCand.Policy.ReduceLatency = true; - if (Top.ShouldIncreaseILP) - TopCand.Policy.ReduceLatency = true; + // Set ReduceLatency to true if needed. + Bot.setLatencyPolicy(TopCand.Policy); + Top.setLatencyPolicy(BotCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited @@ -1579,9 +1577,6 @@ void ConvergingScheduler::checkResourceLimits( // The critical resource is different in each zone, so request balancing. // Compute the cost of each zone. - Rem.MaxRemainingCount = std::max( - Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), - Rem.RemainingCounts[Rem.CritResIdx]); Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); Top.ExpectedCount = std::max( Top.getCriticalCount(), -- cgit v1.2.3-70-g09d2 From e3eddaec4a173c7b9c56aff455d74e4fb2f95daf Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:52:58 +0000 Subject: MISched: Cleanup, redundant statement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 4536059799..117b2bdccf 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1898,7 +1898,6 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, Cand.setBest(TryCand); DEBUG(traceCandidate(Cand, Zone)); } - TryCand.SU = *I; } } -- cgit v1.2.3-70-g09d2 From 47579cf390c42e0577519e0a2b6044baece9df00 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 9 Jan 2013 03:36:49 +0000 Subject: MIsched: add an ILP window property to machine model. This was an experimental option, but needs to be defined per-target. e.g. PPC A2 needs to aggressively hide latency. I converted some in-order scheduling tests to A2. Hal is working on more test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171946 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/TargetSchedule.h | 3 ++ include/llvm/MC/MCSchedule.h | 19 +++++++-- include/llvm/Target/TargetSchedule.td | 1 + lib/CodeGen/MachineScheduler.cpp | 12 +----- lib/Target/ARM/ARMScheduleA9.td | 3 ++ lib/Target/X86/X86Schedule.td | 5 +++ lib/Target/X86/X86ScheduleAtom.td | 1 + test/CodeGen/ARM/misched-inorder-latency.ll | 48 --------------------- test/CodeGen/PowerPC/misched-inorder-latency.ll | 55 +++++++++++++++++++++++++ utils/TableGen/SubtargetEmitter.cpp | 1 + 10 files changed, 86 insertions(+), 62 deletions(-) delete mode 100644 test/CodeGen/ARM/misched-inorder-latency.ll create mode 100644 test/CodeGen/PowerPC/misched-inorder-latency.ll (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 4c4a2a8b95..484d7e200a 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -84,6 +84,9 @@ public: /// \brief Maximum number of micro-ops that may be scheduled per cycle. unsigned getIssueWidth() const { return SchedModel.IssueWidth; } + /// \brief Number of cycles the OOO processor is expected to hide. + unsigned getILPWindow() const { return SchedModel.ILPWindow; } + /// \brief Return the number of issue slots required for this MI. unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC = 0) const; diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index 0c71ee5135..9e9474952a 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -155,7 +155,7 @@ public: // Optional InstrItinerary OperandCycles provides expected latency. // TODO: can't yet specify both min and expected latency per operand. int MinLatency; - static const unsigned DefaultMinLatency = -1; + static const int DefaultMinLatency = -1; // LoadLatency is the expected latency of load instructions. // @@ -172,6 +172,16 @@ public: unsigned HighLatency; static const unsigned DefaultHighLatency = 10; + // ILPWindow is the number of cycles that the scheduler effectively ignores + // before attempting to hide latency. This should be zero for in-order cpus to + // always hide expected latency. For out-of-order cpus, it may be tweaked as + // desired to roughly approximate instruction buffers. The actual threshold is + // not very important for an OOO processor, as long as it isn't too high. A + // nonzero value helps avoid rescheduling to hide latency when its is fairly + // obviously useless and makes register pressure heuristics more effective. + unsigned ILPWindow; + static const unsigned DefaultILPWindow = 0; + // MispredictPenalty is the typical number of extra cycles the processor // takes to recover from a branch misprediction. unsigned MispredictPenalty; @@ -196,6 +206,7 @@ public: MinLatency(DefaultMinLatency), LoadLatency(DefaultLoadLatency), HighLatency(DefaultHighLatency), + ILPWindow(DefaultILPWindow), MispredictPenalty(DefaultMispredictPenalty), ProcID(0), ProcResourceTable(0), SchedClassTable(0), NumProcResourceKinds(0), NumSchedClasses(0), @@ -205,12 +216,12 @@ public: } // Table-gen driven ctor. - MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp, - unsigned pi, const MCProcResourceDesc *pr, + MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp, + unsigned mp, unsigned pi, const MCProcResourceDesc *pr, const MCSchedClassDesc *sc, unsigned npr, unsigned nsc, const InstrItinerary *ii): IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl), - MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), + ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc), InstrItineraries(ii) {} diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 0da82fdd89..b7920bae8a 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -76,6 +76,7 @@ class SchedMachineModel { int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle. int MinLatency = -1; // Determines which instrucions are allowed in a group. // (-1) inorder (0) ooo, (1): inorder +var latencies. + int ILPWindow = -1; // Cycles of latency likely hidden by hardware buffers. int LoadLatency = -1; // Cycles for loads to access the cache. int HighLatency = -1; // Approximation of cycles for "high latency" ops. int MispredictPenalty = -1; // Extra cycles for a mispredicted branch. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 117b2bdccf..a32df7805b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -48,15 +48,6 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Threshold to very roughly model an out-of-order processor's instruction -// buffers. If the actual value of this threshold matters much in practice, then -// it can be specified by the machine model. For now, it's an experimental -// tuning knob to determine when and if it matters. -static cl::opt ILPWindow("ilp-window", cl::Hidden, - cl::desc("Allow expected latency to exceed the critical path by N cycles " - "before attempting to balance ILP"), - cl::init(10U)); - // Experimental heuristics static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -1297,7 +1288,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { if (L > RemLatency) RemLatency = L; } - if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 404634fee9..4191931a5a 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let ILPWindow = 10; // Don't reschedule small blocks to hide + // latency. Minimum latency requirements are already + // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c14407f9ac..d99d085298 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // +// ILPWindow=10 is an arbitrary threshold that approximates cycles of +// latency hidden by instruction buffers. The actual value is not very +// important but should be zero for inorder and nonzero for OOO processors. +// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; + let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 87102614cc..1e5f2d6c9a 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } diff --git a/test/CodeGen/ARM/misched-inorder-latency.ll b/test/CodeGen/ARM/misched-inorder-latency.ll deleted file mode 100644 index 8c06b4ce6e..0000000000 --- a/test/CodeGen/ARM/misched-inorder-latency.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \ -; RUN: -pre-RA-sched=source -scheditins=false -ilp-window=0 \ -; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s -; -; For these tests, we set -ilp-window=0 to simulate in order processor. - -; %val1 is a 3-cycle load live out of %entry. It should be hoisted -; above the add. -; CHECK: @testload -; CHECK: %entry -; CHECK: ldr -; CHECK: adds -; CHECK: bne -; CHECK: %true -define i32 @testload(i32 *%ptr, i32 %sumin) { -entry: - %sum1 = add i32 %sumin, 1 - %val1 = load i32* %ptr - %p = icmp eq i32 %sumin, 0 - br i1 %p, label %true, label %end -true: - %sum2 = add i32 %sum1, 1 - %ptr2 = getelementptr i32* %ptr, i32 1 - %val = load i32* %ptr2 - %val2 = add i32 %val1, %val - br label %end -end: - %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ] - %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ] - %sumout = add i32 %valmerge, %summerge - ret i32 %sumout -} - -; The prefetch gets a default latency of 3 cycles and should be hoisted -; above the add. -; -; CHECK: @testprefetch -; CHECK: %entry -; CHECK: pld -; CHECK: adds -; CHECK: bx -define i32 @testprefetch(i8 *%ptr, i32 %i) { -entry: - %tmp = add i32 %i, 1 - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) - ret i32 %tmp -} -declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll new file mode 100644 index 0000000000..8fae7ad4d1 --- /dev/null +++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \ +; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s +; +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +; %val1 is a load live out of %entry. It should be hoisted +; above the add. +; CHECK: testload: +; CHECK: %entry +; CHECK: lwz +; CHECK: addi +; CHECK: bne +; CHECK: %true +define i32 @testload(i32 *%ptr, i32 %sumin) { +entry: + %sum1 = add i32 %sumin, 1 + %val1 = load i32* %ptr + %p = icmp eq i32 %sumin, 0 + br i1 %p, label %true, label %end +true: + %sum2 = add i32 %sum1, 1 + %ptr2 = getelementptr i32* %ptr, i32 1 + %val = load i32* %ptr2 + %val2 = add i32 %val1, %val + br label %end +end: + %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ] + %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ] + %sumout = add i32 %valmerge, %summerge + ret i32 %sumout +} + +; The prefetch gets a default latency of 3 cycles and should be hoisted +; above the add. +; +; CHECK: testprefetch: +; CHECK: %entry +; CHECK: dcbt +; CHECK: addi +; CHECK: blr +define i32 @testprefetch(i8 *%ptr, i32 %i) { +entry: + %val1 = add i32 %i, 1 + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) + %p = icmp eq i32 %i, 0 + br i1 %p, label %true, label %end +true: + %val2 = add i32 %val1, 1 + br label %end +end: + %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ] + ret i32 %valmerge +} +declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 3b7d006fd1..fc8d00dd83 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -1108,6 +1108,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ','); EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ','); EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ','); + EmitProcessorProp(OS, PI->ModelDef, "ILPWindow", ','); EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ','); OS << " " << PI->Index << ", // Processor ID\n"; if (PI->hasInstrSchedModel()) -- cgit v1.2.3-70-g09d2 From e9ccacd376b2a271434e739f0b7d468cc691723b Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:46:50 +0000 Subject: Fix typo from r170452. Affects -enable-misched heuristics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a32df7805b..b965c7f997 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &BotCand) { // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(TopCand.Policy); - Top.setLatencyPolicy(BotCand.Policy); + Bot.setLatencyPolicy(BopCand.Policy); + Top.setLatencyPolicy(TotCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited -- cgit v1.2.3-70-g09d2 From eed4e0193ff04ba27bfb6d0d4201505f03d99a7c Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:51:16 +0000 Subject: Follow-up typo correction from building the wrong branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/CodeGen/MachineScheduler.cpp') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index b965c7f997..c949266b8b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &BotCand) { // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BopCand.Policy); - Top.setLatencyPolicy(TotCand.Policy); + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited -- cgit v1.2.3-70-g09d2