aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-10-01 22:52:29 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-10-01 22:52:29 +0000
commit055028215d53759a46fcc61fe95cda4a4bdf3a35 (patch)
tree21df1e8050890848937c12c9cd09f0410fc90ef9 /lib/Target/ARM
parente3cc84a43d6a4bb6c50f58f3dd8e60e28787509e (diff)
Fix r115332: correctly model AGU / NEON mux.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115365 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td399
1 files changed, 266 insertions, 133 deletions
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 729c96215a..7a4a33bead 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -131,77 +131,95 @@ def CortexA9Itineraries : ProcessorItineraries<
//
// Immediate offset
InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[3, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[4, 1], [A9_LdBypass]>,
// FIXME: If address is 64-bit aligned, AGU cycles is 1.
InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 3, 1], [A9_LdBypass]>,
//
// Register offset
InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[3, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[4, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset
InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[4, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[5, 1, 1], [A9_LdBypass]>,
//
// Immediate offset with update
InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[3, 2, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[4, 3, 1], [A9_LdBypass]>,
//
// Register offset with update
InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[3, 2, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[4, 3, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset with update
InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[4, 3, 1, 1], [A9_LdBypass]>,
InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[5, 4, 1, 1], [A9_LdBypass]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3], [A9_LdBypass]>,
//
// Load multiple plus branch
InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>,
InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>,
InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2, 1]>,
@@ -209,55 +227,71 @@ def CortexA9Itineraries : ProcessorItineraries<
///
// Immediate offset
InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>], [1, 1]>,
InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [1, 1]>,
// FIXME: If address is 64-bit aligned, AGU cycles is 1.
InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [1, 1]>,
//
// Register offset
InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
//
// Scaled register offset
InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
//
// Immediate offset with update
InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>], [2, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>], [3, 1, 1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
//
// Register offset with update
InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[2, 1, 1, 1]>,
InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 1, 1, 1]>,
InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 1, 1, 1]>,
//
// Scaled register offset with update
InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>],
[2, 1, 1, 1]>,
InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_AGU]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU]>],
[3, 1, 1, 1]>,
//
// Store multiple
InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_AGU]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU]>]>,
// Branch
//
// no delay slots, so the latency of a branch is unimportant
@@ -284,14 +318,16 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Double-precision FP Unary
@@ -299,7 +335,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
@@ -308,7 +345,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Double-precision FP Compare
@@ -316,21 +354,24 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
@@ -338,14 +379,16 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Half to Single FP Convert
InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<3, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[2, 1]>,
//
@@ -353,98 +396,112 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<6, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[5, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<7, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_NPipe]>],
[6, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<9, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[8, 0, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<10, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_NPipe]>],
[9, 0, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<16, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<10, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<10, [A9_NPipe]>],
[15, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<26, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<20, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<20, [A9_NPipe]>],
[25, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<18, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<13, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<13, [A9_NPipe]>],
[17, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<33, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<28, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<28, [A9_NPipe]>],
[32, 1]>,
//
@@ -453,7 +510,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Integer to Double-precision Move
@@ -461,28 +519,32 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1, 1]>,
//
// Single-precision FP Load
InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Double-precision FP Load
@@ -490,34 +552,39 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[2, 1]>,
//
// FP Load Multiple
InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// Double-precision FP Store
InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[1, 1]>,
//
// FP Store Multiple
InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
// VLD1
@@ -525,7 +592,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>]>,
//
// VLD2
// FIXME: We don't model this instruction properly
@@ -533,7 +601,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[2, 2, 1]>,
//
// VLD3
@@ -542,7 +611,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[2, 2, 2, 1]>,
//
// VLD4
@@ -551,7 +621,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[2, 2, 2, 2, 1]>,
//
// VST
@@ -560,14 +631,16 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>]>,
//
// Double-register Integer Unary
InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 2]>,
//
// Quad-register Integer Unary
@@ -575,7 +648,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 2]>,
//
// Double-register Integer Q-Unary
@@ -583,7 +657,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Quad-register Integer CountQ-Unary
@@ -591,7 +666,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1]>,
//
// Double-register Integer Binary
@@ -599,7 +675,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 2, 2]>,
//
// Quad-register Integer Binary
@@ -607,7 +684,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 2, 2]>,
//
// Double-register Integer Subtract
@@ -615,7 +693,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 2, 1]>,
//
// Quad-register Integer Subtract
@@ -623,7 +702,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 2, 1]>,
//
// Double-register Integer Shift
@@ -631,7 +711,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 1, 1]>,
//
// Quad-register Integer Shift
@@ -639,7 +720,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[3, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
@@ -647,7 +729,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
@@ -655,7 +738,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 1, 1]>,
//
// Double-register Integer Binary (4 cycle)
@@ -663,7 +747,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 2, 2]>,
//
// Quad-register Integer Binary (4 cycle)
@@ -671,7 +756,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_MUX0, A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_NPipe]>],
[4, 2, 2]>,
//
// Double-register Integer Subtract (4 cycle)
@@ -679,7 +765,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
InstrStage<1, [A9_Pipe1]>,