diff options
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 97 |
3 files changed, 87 insertions, 27 deletions
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index bbf43dec58..bd6791f92e 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -23,7 +23,7 @@ class FuncUnit; class ReservationKind<bits<1> val> { - bits<1> Value = val; + int Value = val; } def Required : ReservationKind<0>; @@ -43,14 +43,19 @@ def Reserved : ReservationKind<1>; // InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles // InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit // -class InstrStage<int cycles, list<FuncUnit> units, - int timeinc = -1, ReservationKind kind = Required> { + +class InstrStage2<int cycles, list<FuncUnit> units, + int timeinc, ReservationKind kind> { int Cycles = cycles; // length of stage in machine cycles list<FuncUnit> Units = units; // choice of functional units int TimeInc = timeinc; // cycles till start of next stage int Kind = kind.Value; // kind of FU reservation } +class InstrStage<int cycles, list<FuncUnit> units, + int timeinc = -1> + : InstrStage2<cycles, units, timeinc, Required>; + //===----------------------------------------------------------------------===// // Instruction itinerary - An itinerary represents a sequential series of steps // required to complete an instruction. Itineraries are represented as lists of @@ -71,10 +76,10 @@ def NoItinerary : InstrItinClass; // Instruction itinerary data - These values provide a runtime map of an // instruction itinerary class (name) to its itinerary data. // -class InstrItinData<InstrItinClass Class, list<InstrStage> stages, +class InstrItinData<InstrItinClass Class, list<InstrStage2> stages, list<int> operandcycles = []> { InstrItinClass TheClass = Class; - list<InstrStage> Stages = stages; + list<InstrStage2> Stages = stages; list<int> OperandCycles = operandcycles; } diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index fc4c5f5830..db15a85e40 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -17,6 +17,8 @@ def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe def FU_NLSPipe : FuncUnit; // NEON LS pipe +def FU_DRegsVFP: FuncUnit; // FP register set, VFP side +def FU_DRegsN : FuncUnit; // FP register set, NEON side //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index b121045dd5..0d7a5539c1 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 // def CortexA9Itineraries : ProcessorItineraries<[ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration. + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. - // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSTAT , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpUNA32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpUNA64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCMP32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCMP64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTSD , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTDS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTSI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTDI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTIS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTID , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpALU32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpALU64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMUL32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<6, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMUL64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<7, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMAC32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<9, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMAC64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<10, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpDIV32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<16, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<10, [FU_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpDIV64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<26, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<20, [FU_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSQRT32, [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<18, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<13, [FU_NPipe]>], [17, 1]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<33, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<28, [FU_NPipe]>], [32, 1]> ]>; |