aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-11-03 00:45:17 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-11-03 00:45:17 +0000
commit8239daf7c83a65a189c352cce3191cdc3bbfe151 (patch)
tree84b82c2cf503208d1f67007255f2f56fdb383c55 /lib/Target
parent41957f6eb2271e5f1981b32a873d1b58217c6411 (diff)
Two sets of changes. Sorry they are intermingled.
1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to "optimize for latency". Call instructions don't have the right latency and this is more likely to use introduce spills. 2. Fix if-converter cost function. For ARM, it should use instruction latencies, not # of micro-ops since multi-latency instructions is completely executed even when the predicate is false. Also, some instruction will be "slower" when they are predicated due to the register def becoming implicit input. rdar://8598427 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118135 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp100
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h22
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp27
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h6
-rw-r--r--lib/Target/TargetInstrInfo.cpp24
5 files changed, 99 insertions, 80 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 3257065008..b023379e7b 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -40,10 +40,6 @@ static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
-static cl::opt<bool>
-OldARMIfCvt("old-arm-ifcvt", cl::Hidden,
- cl::desc("Use old-style ARM if-conversion heuristics"));
-
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
: TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
Subtarget(STI) {
@@ -1205,53 +1201,36 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
}
bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
float Probability,
float Confidence) const {
- if (!NumInstrs)
+ if (!NumCyles)
return false;
- // Use old-style heuristics
- if (OldARMIfCvt) {
- if (Subtarget.getCPUString() == "generic")
- // Generic (and overly aggressive) if-conversion limits for testing.
- return NumInstrs <= 10;
- if (Subtarget.hasV7Ops())
- return NumInstrs <= 3;
- return NumInstrs <= 2;
- }
-
// Attempt to estimate the relative costs of predication versus branching.
- float UnpredCost = Probability * NumInstrs;
+ float UnpredCost = Probability * NumCyles;
UnpredCost += 1.0; // The branch itself
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
- float PredCost = NumInstrs;
-
- return PredCost < UnpredCost;
-
+ return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
- MachineBasicBlock &FMBB, unsigned NumF,
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
float Probability, float Confidence) const {
- // Use old-style if-conversion heuristics
- if (OldARMIfCvt) {
- return NumT && NumF && NumT <= 2 && NumF <= 2;
- }
-
- if (!NumT || !NumF)
+ if (!TCycles || !FCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
- float UnpredCost = Probability * NumT + (1.0 - Probability) * NumF;
+ float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
UnpredCost += 1.0; // The branch itself
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
- float PredCost = NumT + NumF;
-
- return PredCost < UnpredCost;
+ return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1591,8 +1570,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
unsigned
-ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
- const InstrItineraryData *ItinData) const {
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
if (!ItinData || ItinData->isEmpty())
return 1;
@@ -1649,9 +1628,14 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
case ARM::t2STM_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
if (Subtarget.isCortexA8()) {
- // 4 registers would be issued: 1, 2, 1.
- // 5 registers would be issued: 1, 2, 2.
- return 1 + (NumRegs / 2);
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++UOps;
+ return UOps;
} else if (Subtarget.isCortexA9()) {
UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
@@ -2025,6 +2009,46 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Class = TID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQ:
+ case ARM::VSTMQ:
+ return 2;
+ }
+}
+
bool ARMBaseInstrInfo::
hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index e19bd9c566..c11f02ccb1 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -318,18 +318,20 @@ public:
const MachineFunction &MF) const;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs,
+ unsigned NumCyles, unsigned ExtraPredCycles,
float Prob, float Confidence) const;
- virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
- MachineBasicBlock &FMBB,unsigned NumF,
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
float Probability, float Confidence) const;
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs,
+ unsigned NumCyles,
float Probability,
float Confidence) const {
- return NumInstrs == 1;
+ return NumCyles == 1;
}
/// AnalyzeCompare - For a comparison instruction, return the source register
@@ -345,8 +347,8 @@ public:
const MachineRegisterInfo *MRI,
MachineBasicBlock::iterator &MII) const;
- virtual unsigned getNumMicroOps(const MachineInstr *MI,
- const InstrItineraryData *ItinData) const;
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
virtual
int getOperandLatency(const InstrItineraryData *ItinData,
@@ -379,6 +381,12 @@ private:
const TargetInstrDesc &UseTID,
unsigned UseIdx, unsigned UseAlign) const;
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 0a0f3146ef..719b140ce9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -42,33 +42,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs,
- float Prediction,
- float Confidence) const {
- if (!OldT2IfCvt)
- return ARMBaseInstrInfo::isProfitableToIfCvt(MBB, NumInstrs,
- Prediction, Confidence);
- return NumInstrs && NumInstrs <= 3;
-}
-
-bool Thumb2InstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
- MachineBasicBlock &FMBB, unsigned NumF,
- float Prediction, float Confidence) const {
- if (!OldT2IfCvt)
- return ARMBaseInstrInfo::isProfitableToIfCvt(TMBB, NumT,
- FMBB, NumF,
- Prediction, Confidence);
-
- // FIXME: Catch optimization such as:
- // r0 = movne
- // r0 = moveq
- return NumT && NumF &&
- NumT <= 3 && NumF <= 3;
-}
-
-
void
Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MachineBasicBlock *NewDest) const {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index b348ad0191..9ed7eea7e2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -38,12 +38,6 @@ public:
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
- float Prediction, float Confidence) const;
- bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
- MachineBasicBlock &FMBB, unsigned NumFInstrs,
- float Prediction, float Confidence) const;
-
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 62818d0cae..eca97ab096 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -50,8 +50,8 @@ TargetInstrInfo::~TargetInstrInfo() {
}
unsigned
-TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
- const InstrItineraryData *ItinData) const {
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
if (!ItinData || ItinData->isEmpty())
return 1;
@@ -94,6 +94,26 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI,
unsigned DefIdx) const {