diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
commit | 8239daf7c83a65a189c352cce3191cdc3bbfe151 (patch) | |
tree | 84b82c2cf503208d1f67007255f2f56fdb383c55 /lib/CodeGen | |
parent | 41957f6eb2271e5f1981b32a873d1b58217c6411 (diff) |
Two sets of changes. Sorry they are intermingled.
1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to
"optimize for latency". Call instructions don't have the right latency and
this is more likely to use introduce spills.
2. Fix if-converter cost function. For ARM, it should use instruction latencies,
not # of micro-ops since multi-latency instructions is completely executed
even when the predicate is false. Also, some instruction will be "slower"
when they are predicated due to the register def becoming implicit input.
rdar://8598427
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118135 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/IfConversion.cpp | 59 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 11 |
4 files changed, 56 insertions, 30 deletions
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index e90985bf89..c05aa40b67 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -93,7 +93,8 @@ namespace { /// ClobbersPred - True if BB could modify predicates (e.g. has /// cmp, call, etc.) /// NonPredSize - Number of non-predicated instructions. - /// ExtraCost - Extra cost for microcoded instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. /// TrueBB / FalseBB- See AnalyzeBranch(). /// BrCond - Conditions for end of block conditional branches. @@ -110,6 +111,7 @@ namespace { bool ClobbersPred : 1; unsigned NonPredSize; unsigned ExtraCost; + unsigned ExtraCost2; MachineBasicBlock *BB; MachineBasicBlock *TrueBB; MachineBasicBlock *FalseBB; @@ -119,7 +121,7 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -203,17 +205,20 @@ namespace { bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size, + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, float Prediction, float Confidence) const { - return Size > 0 && TII->isProfitableToIfCvt(BB, Size, - Prediction, Confidence); + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction, Confidence); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, - MachineBasicBlock &FBB, unsigned FSize, + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, float Prediction, float Confidence) const { - return TSize > 0 && FSize > 0 && - TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize, + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, Prediction, Confidence); } @@ -649,6 +654,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // Then scan all the instructions. BBI.NonPredSize = 0; BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { @@ -665,9 +671,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (!isCondBr) { if (!isPredicated) { BBI.NonPredSize++; - unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins); - if (NumOps > 1) - BBI.ExtraCost += NumOps-1; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; } else if (!AlreadyPredicated) { // FIXME: This instruction is already predicated before the // if-conversion pass. It's probably something like a conditional move. @@ -815,9 +824,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + - TrueBBI.ExtraCost), + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + - FalseBBI.ExtraCost), + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { @@ -836,7 +845,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -851,7 +860,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; @@ -859,7 +868,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -878,7 +887,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; @@ -888,7 +897,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; @@ -897,7 +906,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1427,9 +1436,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned NumOps = TII->getNumMicroOps(MI, InstrItins); - if (NumOps > 1) - ToBBI.ExtraCost += NumOps-1; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { @@ -1504,8 +1515,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.NonPredSize += FromBBI.NonPredSize; ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; FromBBI.NonPredSize = 0; FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.HasFallThrough = FromBBI.HasFallThrough; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index abd68caf12..e86a78c691 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -238,6 +238,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); + SU->isCall = TID.isCall(); + SU->isCommutable = TID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -564,9 +566,9 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { // extra time. if (SU->getInstr()->getDesc().mayLoad()) SU->Latency += 2; - } else - SU->Latency = - InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass()); + } else { + SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); + } } void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ea1aaa1e05..9978d00f20 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1589,6 +1589,10 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for @@ -1648,6 +1652,10 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ bool ilp_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7d01bd31b9..429b1152b0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isCall = Old->isCall; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; @@ -300,6 +301,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; } // Scan down to find any flagged succs. @@ -316,6 +319,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; break; } if (!HasFlagUse) break; @@ -438,10 +443,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // all nodes flagged together into this SUnit. SU->Latency = 0; for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - if (N->isMachineOpcode()) { - SU->Latency += InstrItins-> - getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); - } + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); } void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, |