aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Goodwin <david_goodwin@apple.com>2009-08-11 22:38:43 +0000
committerDavid Goodwin <david_goodwin@apple.com>2009-08-11 22:38:43 +0000
commit546952fd600ddba3f1eb6d4f93ff4eb42821a962 (patch)
tree7b06ab7ec67d14a1bab1d9a63a36c71cb7d6cae7 /lib
parente28a2e8b70e926324575ddec0a1565c6dba7d404 (diff)
Allow a zero cycle stage to reserve/require a FU without advancing the cycle counter.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78736 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/ExactHazardRecognizer.cpp21
-rw-r--r--lib/Target/ARM/ARM.td2
-rw-r--r--lib/Target/ARM/ARMSchedule.td15
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td16
-rw-r--r--lib/Target/ARM/ARMScheduleV7.td65
5 files changed, 75 insertions, 44 deletions
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 5a89d22359..48043f286c 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -39,7 +39,7 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData
unsigned ItinDepth = 0;
for (; IS != E; ++IS)
- ItinDepth += IS->Cycles;
+ ItinDepth += std::max(1U, IS->Cycles);
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
}
@@ -89,9 +89,13 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
+ // If the stages cycles are 0, then we must have the FU free in
+ // the current cycle, but we don't advance the cycle time .
+ unsigned StageCycles = std::max(1U, IS->Cycles);
+
// We must find one of the stage's units free for every cycle the
// stage is occupied.
- for (unsigned int i = 0; i < IS->Cycles; ++i) {
+ for (unsigned int i = 0; i < StageCycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
@@ -103,7 +107,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
return Hazard;
}
- ++cycle;
+ if (IS->Cycles > 0)
+ ++cycle;
}
}
@@ -118,9 +123,13 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
+ // If the stages cycles are 0, then we must reserve the FU in the
+ // current cycle, but we don't advance the cycle time .
+ unsigned StageCycles = std::max(1U, IS->Cycles);
+
// We must reserve one of the stage's units for every cycle the
// stage is occupied.
- for (unsigned int i = 0; i < IS->Cycles; ++i) {
+ for (unsigned int i = 0; i < StageCycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
@@ -135,7 +144,9 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit;
- ++cycle;
+
+ if (IS->Cycles > 0)
+ ++cycle;
}
}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ce28149b54..eb6304c448 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -114,7 +114,7 @@ def : Processor<"arm1156t2f-s", V6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>;
-def : Processor<"cortex-a9", V7Itineraries,
+def : Processor<"cortex-a9", CortexA9Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index a5ca773ef1..11a7b2a717 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -10,8 +10,9 @@
//===----------------------------------------------------------------------===//
// Functional units across ARM processors
//
-def FU_Pipe0 : FuncUnit; // pipeline 0 issue
-def FU_Pipe1 : FuncUnit; // pipeline 1 issue
+def FU_Issue : FuncUnit; // issue
+def FU_Pipe0 : FuncUnit; // pipeline 0
+def FU_Pipe1 : FuncUnit; // pipeline 1
def FU_LdSt0 : FuncUnit; // pipeline 0 load/store
def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
@@ -19,9 +20,11 @@ def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
// Instruction Itinerary classes used for ARM
//
def IIC_iALU : InstrItinClass;
+def IIC_iMPY : InstrItinClass;
def IIC_iLoad : InstrItinClass;
def IIC_iStore : InstrItinClass;
def IIC_fpALU : InstrItinClass;
+def IIC_fpMPY : InstrItinClass;
def IIC_fpLoad : InstrItinClass;
def IIC_fpStore : InstrItinClass;
def IIC_Br : InstrItinClass;
@@ -31,12 +34,14 @@ def IIC_Br : InstrItinClass;
def GenericItineraries : ProcessorItineraries<[
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
- InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
- InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
- InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
+ InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index f0b8116a03..755547a678 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -11,18 +11,16 @@
//
//===----------------------------------------------------------------------===//
+// TODO: this should model an ARM11
// Single issue pipeline so every itinerary starts with FU_pipe0
def V6Itineraries : ProcessorItineraries<[
- // single-cycle integer ALU
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
- // loads have an extra cycle of latency, but are fully pipelined
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
- InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
- // fully-pipelined stores
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
- InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
- // fp ALU is not pipelined
- InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>,
- // no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
+ InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index 30360bc9c4..8a7b42eb72 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -11,34 +11,51 @@
//
//===----------------------------------------------------------------------===//
-// Single issue pipeline so every itinerary starts with FU_Pipe0
-def V7Itineraries : ProcessorItineraries<[
- // single-cycle integer ALU
- InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
- // loads have an extra cycle of latency, but are fully pipelined
- InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
- InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
- // fully-pipelined stores
- InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
- InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
- // fp ALU is not pipelined
- InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>,
- // no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
-]>;
-
// Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1
def CortexA8Itineraries : ProcessorItineraries<[
- // single-cycle integer ALU
+ // two fully-pipelined integer ALU pipelines
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+ // one fully-pipelined integer Multiply pipeline
+ // function units are used in alpha order, so use FU_Pipe1
+ // for the Multiple pipeline
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>,
// loads have an extra cycle of latency, but are fully pipelined
- InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
- InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
+ // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>,
+ InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+ InstrStage<1, [FU_LdSt0]>]>,
// fully-pipelined stores
- InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
- InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
- // fp ALU is not pipelined
- InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0, FU_Pipe1]>]>,
+ // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>,
+ InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
+ InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+
+ // VFP ALU is not pipelined so stall all issues
+ // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
+ InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ // VFP MPY is not pipelined so stall all issues
+ // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
+ InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ // loads have an extra cycle of latency, but are fully pipelined
+ // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>,
+ InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+ InstrStage<1, [FU_LdSt0]>]>,
+ // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>,
+ InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
+]>;
+
+// FIXME
+def CortexA9Itineraries : ProcessorItineraries<[
+ InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+ InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
+ InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;