diff options
author | David Goodwin <david_goodwin@apple.com> | 2009-08-11 22:38:43 +0000 |
---|---|---|
committer | David Goodwin <david_goodwin@apple.com> | 2009-08-11 22:38:43 +0000 |
commit | 546952fd600ddba3f1eb6d4f93ff4eb42821a962 (patch) | |
tree | 7b06ab7ec67d14a1bab1d9a63a36c71cb7d6cae7 /lib | |
parent | e28a2e8b70e926324575ddec0a1565c6dba7d404 (diff) |
Allow a zero cycle stage to reserve/require a FU without advancing the cycle counter.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78736 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/ExactHazardRecognizer.cpp | 21 | ||||
-rw-r--r-- | lib/Target/ARM/ARM.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV6.td | 16 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 65 |
5 files changed, 75 insertions, 44 deletions
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 5a89d22359..48043f286c 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -39,7 +39,7 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData unsigned ItinDepth = 0; for (; IS != E; ++IS) - ItinDepth += IS->Cycles; + ItinDepth += std::max(1U, IS->Cycles); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); } @@ -89,9 +89,13 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { + // If the stages cycles are 0, then we must have the FU free in + // the current cycle, but we don't advance the cycle time . + unsigned StageCycles = std::max(1U, IS->Cycles); + // We must find one of the stage's units free for every cycle the // stage is occupied. - for (unsigned int i = 0; i < IS->Cycles; ++i) { + for (unsigned int i = 0; i < StageCycles; ++i) { assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); unsigned index = getFutureIndex(cycle); @@ -103,7 +107,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU return Hazard; } - ++cycle; + if (IS->Cycles > 0) + ++cycle; } } @@ -118,9 +123,13 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { + // If the stages cycles are 0, then we must reserve the FU in the + // current cycle, but we don't advance the cycle time . + unsigned StageCycles = std::max(1U, IS->Cycles); + // We must reserve one of the stage's units for every cycle the // stage is occupied. - for (unsigned int i = 0; i < IS->Cycles; ++i) { + for (unsigned int i = 0; i < StageCycles; ++i) { assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); unsigned index = getFutureIndex(cycle); @@ -135,7 +144,9 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { assert(freeUnit && "No function unit available!"); Scoreboard[index] |= freeUnit; - ++cycle; + + if (IS->Cycles > 0) + ++cycle; } } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ce28149b54..eb6304c448 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -114,7 +114,7 @@ def : Processor<"arm1156t2f-s", V6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>; -def : Processor<"cortex-a9", V7Itineraries, +def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index a5ca773ef1..11a7b2a717 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -10,8 +10,9 @@ //===----------------------------------------------------------------------===// // Functional units across ARM processors // -def FU_Pipe0 : FuncUnit; // pipeline 0 issue -def FU_Pipe1 : FuncUnit; // pipeline 1 issue +def FU_Issue : FuncUnit; // issue +def FU_Pipe0 : FuncUnit; // pipeline 0 +def FU_Pipe1 : FuncUnit; // pipeline 1 def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store @@ -19,9 +20,11 @@ def FU_LdSt1 : FuncUnit; // pipeline 1 load/store // Instruction Itinerary classes used for ARM // def IIC_iALU : InstrItinClass; +def IIC_iMPY : InstrItinClass; def IIC_iLoad : InstrItinClass; def IIC_iStore : InstrItinClass; def IIC_fpALU : InstrItinClass; +def IIC_fpMPY : InstrItinClass; def IIC_fpLoad : InstrItinClass; def IIC_fpStore : InstrItinClass; def IIC_Br : InstrItinClass; @@ -31,12 +34,14 @@ def IIC_Br : InstrItinClass; def GenericItineraries : ProcessorItineraries<[ InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>, InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>, - InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>, - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]> + InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f0b8116a03..755547a678 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -11,18 +11,16 @@ // //===----------------------------------------------------------------------===// +// TODO: this should model an ARM11 // Single issue pipeline so every itinerary starts with FU_pipe0 def V6Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>, - // loads have an extra cycle of latency, but are fully pipelined + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>, InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, - // fully-pipelined stores InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>, - InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>, - // fp ALU is not pipelined - InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>, - // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]> + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 30360bc9c4..8a7b42eb72 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -11,34 +11,51 @@ // //===----------------------------------------------------------------------===// -// Single issue pipeline so every itinerary starts with FU_Pipe0 -def V7Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU - InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>, - // loads have an extra cycle of latency, but are fully pipelined - InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, - // fully-pipelined stores - InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>, - InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>, - // fp ALU is not pipelined - InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>, - // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]> -]>; - // Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1 def CortexA8Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU + // two fully-pipelined integer ALU pipelines InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, + // one fully-pipelined integer Multiply pipeline + // function units are used in alpha order, so use FU_Pipe1 + // for the Multiple pipeline + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>, // loads have an extra cycle of latency, but are fully pipelined - InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, // fully-pipelined stores - InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // fp ALU is not pipelined - InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0, FU_Pipe1]>]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, + + // VFP ALU is not pipelined so stall all issues + // FIXME assume NFP pipeline and 7 cycle non-pipelined latency + InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + // VFP MPY is not pipelined so stall all issues + // FIXME assume NFP pipeline and 7 cycle non-pipelined latency + InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + // loads have an extra cycle of latency, but are fully pipelined + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> +]>; + +// FIXME +def CortexA9Itineraries : ProcessorItineraries<[ + InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]> ]>; |