diff options
author | Anton Korobeynikov <asl@math.spbu.ru> | 2010-04-18 20:31:01 +0000 |
---|---|---|
committer | Anton Korobeynikov <asl@math.spbu.ru> | 2010-04-18 20:31:01 +0000 |
commit | 928eb49cae286c95dceecf4442997dd561c6e3b7 (patch) | |
tree | d65ac9dd3b6f18abc632f4d186d430e075ded25a /lib/Target/ARM | |
parent | d65077a50901cbe55d8285bb1149eb8ba8210a58 (diff) |
Make processor FUs unique for given itinerary. This extends the limit of 32
FU per CPU arch to 32 per intinerary allowing precise modelling of quite
complex pipelines in the future.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101754 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 16 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 680 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 846 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV6.td | 142 |
4 files changed, 846 insertions, 838 deletions
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 1c96976d6c..b60ccca468 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -8,19 +8,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across ARM processors -// -def FU_Issue : FuncUnit; // issue -def FU_Pipe0 : FuncUnit; // pipeline 0 -def FU_Pipe1 : FuncUnit; // pipeline 1 -def FU_LdSt0 : FuncUnit; // pipeline 0 load/store -def FU_LdSt1 : FuncUnit; // pipeline 1 load/store -def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe -def FU_NLSPipe : FuncUnit; // NEON LS pipe -def FU_DRegsVFP: FuncUnit; // FP register set, VFP side -def FU_DRegsN : FuncUnit; // FP register set, NEON side - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // def IIC_iALUx : InstrItinClass; @@ -165,8 +152,7 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[]>; - +def GenericItineraries : ProcessorItineraries<[], []>; include "ARMScheduleV6.td" include "ARMScheduleA8.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index d43a9c7643..bbfc0b2b47 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -13,156 +13,164 @@ // // Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// Functional Units. +def A8_Issue : FuncUnit; // issue +def A8_Pipe0 : FuncUnit; // pipeline 0 +def A8_Pipe1 : FuncUnit; // pipeline 1 +def A8_LdSt0 : FuncUnit; // pipeline 0 load/store +def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A8_NLSPipe : FuncUnit; // NEON LS pipe // -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 +// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 // -def CortexA8Itineraries : ProcessorItineraries<[ - +def CortexA8Itineraries : ProcessorItineraries< + [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ // Two fully-pipelined integer ALU pipelines // // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, + InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, // Integer load pipeline // // loads have an extra cycle of latency, but are fully pipelined - // use FU_Issue to enforce the 1 load/store per cycle limit + // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, + InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, + InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, // // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, // Integer store pipeline // - // use FU_Issue to enforce the 1 load/store per cycle limit + // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, + InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, + InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, + InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -170,441 +178,441 @@ def CortexA8Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>], [7, 1]>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<7, [A8_NPipe], 0>, + InstrStage<7, [A8_NLSPipe]>], [7, 1]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>], [5, 1]>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NPipe], 0>, + InstrStage<5, [A8_NLSPipe]>], [5, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<9, [A8_NPipe], 0>, + InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<11, [A8_NPipe], 0>, + InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<20, [A8_NPipe], 0>, + InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 1]>, + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 1]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1]>, + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1]>, // // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. // // VLD1 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD1, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD2, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD3, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD4, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData<IIC_VST, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2]>, + InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2]>, // // Quad-register FP Unary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2]>, + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2]>, // // Double-register FP Binary - InstrItinData<IIC_VBIND, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, + InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, + InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, // // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3]>, + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3]>, // // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Quad-register Permute Move // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1]>, + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1]>, // // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 1]>, + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 1]>, // // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, // // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, + InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData< |