I'm introducing a new machine model to simultaneously allow simple

subtarget CPU descriptions and support new features of MachineScheduler. MachineModel has three categories of data: 1) Basic properties for coarse grained instruction cost model. 2) Scheduler Read/Write resources for simple per-opcode and operand cost model (TBD). 3) Instruction itineraties for detailed per-cycle reservation tables. These will all live side-by-side. Any subtarget can use any combination of them. Instruction itineraries will not change in the near term. In the long run, I expect them to only be relevant for in-order VLIW machines that have complex contraints and require a precise scheduling/bundling model. Once itineraries are only actively used by VLIW-ish targets, they could be replaced by something more appropriate for those targets. This tablegen backend rewrite sets things up for introducing MachineModel type #2: per opcode/operand cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159891 91177308-0d34-0410-b5e6-96231b3b80d8
author: Andrew Trick <atrick@apple.com> 2012-07-07 04:00:00 +0000
committer: Andrew Trick <atrick@apple.com> 2012-07-07 04:00:00 +0000
commit: 2661b411ccc81b1fe19194d3f43b2630cbef3f28 (patch)
tree: 0decaebaee6c3a1a9d42df6b5619de1ffb2fac7d /lib
parent: 06495cd7f2a91c4f659eac5e55b1c08b014d0a08 (diff)
14 files changed, 104 insertions, 59 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 847bf1e76e..098ec4741f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -403,7 +403,8 @@ public:
 
   /// getIssueWidth - Return the max instructions per scheduling group.
   unsigned getIssueWidth() const {
-    return InstrItins ? InstrItins->Props.IssueWidth : 1;
+    return (InstrItins && InstrItins->SchedModel)
+      ? InstrItins->SchedModel->IssueWidth : 1;
   }
 
   /// getNumMicroOps - Return the number of issue slots required for this MI.
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 7110b7566a..e675366485 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -72,10 +72,12 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
   ReservedScoreboard.reset(ScoreboardDepth);
   RequiredScoreboard.reset(ScoreboardDepth);
 
+  // If MaxLookAhead is not set above, then we are not enabled.
   if (!isEnabled())
     DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
   else {
-    IssueWidth = ItinData->Props.IssueWidth;
+    // A nonempty itinerary must have a SchedModel.
+    IssueWidth = ItinData->SchedModel->IssueWidth;
     DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
           << ScoreboardDepth << '\n');
   }
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 01622cb295..c3794d5f78 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
 
   // If packet is now full, reset the state so in the next cycle
   // we start fresh.
-  if (Packet.size() >= InstrItins->Props.IssueWidth) {
+  if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
     ResourcesModel->clearResources();
     Packet.clear();
   }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 54be88a8bb..1da5512c91 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -573,9 +573,9 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
 unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
                                             const MachineInstr *DefMI) const {
   if (DefMI->mayLoad())
-    return ItinData->Props.LoadLatency;
+    return ItinData->SchedModel->LoadLatency;
   if (isHighLatencyDef(DefMI->getOpcode()))
-    return ItinData->Props.HighLatency;
+    return ItinData->SchedModel->HighLatency;
   return 1;
 }
 
@@ -629,7 +629,7 @@ static int computeDefOperandLatency(
   if (FindMin) {
     // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
     // it exists before defaulting to MinLatency.
-    if (ItinData->Props.MinLatency >= 0)
+    if (ItinData->SchedModel->MinLatency >= 0)
       return TII->getInstrLatency(ItinData, DefMI);
 
     // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 318e5a3da2..05c83f760a 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -17,11 +17,13 @@
 
 using namespace llvm;
 
+MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors.
+
 void
 MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
                                      const SubtargetFeatureKV *PF,
                                      const SubtargetFeatureKV *PD,
-                                     const SubtargetInfoKV *PI,
+                                     const SubtargetInfoKV *ProcSched,
                                      const InstrStage *IS,
                                      const unsigned *OC,
                                      const unsigned *FP,
@@ -29,7 +31,7 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
   TargetTriple = TT;
   ProcFeatures = PF;
   ProcDesc = PD;
-  ProcItins = PI;
+  ProcSchedModel = ProcSched;
   Stages = IS;
   OperandCycles = OC;
   ForwardingPaths = FP;
@@ -68,14 +70,14 @@ uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
 }
 
 
-InstrItineraryData
-MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
-  assert(ProcItins && "Instruction itineraries information not available!");
+MCSchedModel *
+MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
+  assert(ProcSchedModel && "Processor machine model not available!");
 
 #ifndef NDEBUG
   for (size_t i = 1; i < NumProcs; i++) {
-    assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 &&
-           "Itineraries table is not sorted");
+    assert(strcmp(ProcSchedModel[i - 1].Key, ProcSchedModel[i].Key) < 0 &&
+           "Processor machine model table is not sorted");
   }
 #endif
 
@@ -83,16 +85,19 @@ MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
   SubtargetInfoKV KV;
   KV.Key = CPU.data();
   const SubtargetInfoKV *Found =
-    std::lower_bound(ProcItins, ProcItins+NumProcs, KV);
-  if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) {
+    std::lower_bound(ProcSchedModel, ProcSchedModel+NumProcs, KV);
+  if (Found == ProcSchedModel+NumProcs || StringRef(Found->Key) != CPU) {
     errs() << "'" << CPU
            << "' is not a recognized processor for this target"
            << " (ignoring processor)\n";
-    return InstrItineraryData();
+    return &MCSchedModel::DefaultSchedModel;
   }
+  assert(Found->Value && "Missing processor SchedModel value");
+  return (MCSchedModel *)Found->Value;
+}
 
-  InstrItinerarySubtargetValue *V =
-    (InstrItinerarySubtargetValue *)Found->Value;
-  return InstrItineraryData(V->Props, Stages, OperandCycles, ForwardingPaths,
-                            V->Itineraries);
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+  MCSchedModel *SchedModel = getSchedModelForCPU(CPU);
+  return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths);
 }
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index d332d20f80..cd3c0e0a41 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -204,13 +204,13 @@ def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
                                                        FeatureDSPThumb2]>;
 
 // V7a Processors.
-def : Processor<"cortex-a8",        CortexA8Itineraries,
+def : ProcessorModel<"cortex-a8",   CortexA8Model,
                                     [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9",        CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9",   CortexA9Model,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureMP,
                                      FeatureHasRAS]>;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 61de00a208..56197d4e00 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -22,11 +22,7 @@ def A8_NLSPipe : FuncUnit; // NEON LS pipe
 //
 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
 //
-def CortexA8Itineraries : MultiIssueItineraries<
-  2,  // IssueWidth
-  -1, // MinLatency - OperandCycles are interpreted as MinLatency.
-  2,  // LoadLatency - overriden by OperandCycles.
-  10, // HighLatency - currently unused.
+def CortexA8Itineraries : ProcessorItineraries<
   [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
   [], [
   // Two fully-pipelined integer ALU pipelines
@@ -1061,3 +1057,18 @@ def CortexA8Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A8_NPipe], 0>,
                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
+def CortexA8Model : SchedMachineModel {
+  let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+  let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+  let LoadLatency = 2; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+
+  let Itineraries = CortexA8Itineraries;
+}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 1677ba6a98..738974e5f3 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -11,6 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
 //
 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
 // Reference Manual".
@@ -31,11 +35,7 @@ def A9_DRegsN  : FuncUnit; // FP register set, NEON side
 // Bypasses
 def A9_LdBypass : Bypass;
 
-def CortexA9Itineraries : MultiIssueItineraries<
-  2, // IssueWidth - FIXME: A9_Issue0, A9_Issue1 are now redundant.
-  0, // MinLatency - FIXME: for misched, remove InstrStage for OOO operations.
-  2, // LoadLatency - optimistic, assumes bypass, overriden by OperandCycles.
-  10, // HighLatency - currently unused.
+def CortexA9Itineraries : ProcessorItineraries<
   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
   [A9_LdBypass], [
@@ -1874,3 +1874,21 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                InstrStage<2, [A9_NPipe]>],
                               [4, 1, 2, 2, 3, 3, 1]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
+def CortexA9Model : SchedMachineModel {
+  let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+  let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+  let LoadLatency = 2; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+
+  let Itineraries = CortexA9Itineraries;
+}
+
+// TODO: Add Cortex-A9 processor and scheduler resources.
+
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 8201de3f2a..451e56206e 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -47,14 +47,14 @@ def HexagonInstrInfo : InstrInfo;
 // Hexagon processors supported.
 //===----------------------------------------------------------------------===//
 
-class Proc<string Name, ProcessorItineraries Itin,
+class Proc<string Name, SchedMachineModel Model,
            list<SubtargetFeature> Features>
- : Processor<Name, Itin, Features>;
+ : ProcessorModel<Name, Model, Features>;
 
-def : Proc<"hexagonv2", HexagonItineraries,   [ArchV2]>;
-def : Proc<"hexagonv3", HexagonItineraries,   [ArchV2, ArchV3]>;
-def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
-def : Proc<"hexagonv5", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
+def : Proc<"hexagonv2", HexagonModel,   [ArchV2]>;
+def : Proc<"hexagonv3", HexagonModel,   [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
+def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
 
 
 // Hexagon Uses the MC printer for assembler output, so make sure the TableGen
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index b4df6784e7..d1076b8e44 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -41,9 +41,12 @@ def HexagonItineraries :
         InstrItinData<SYS    , [InstrStage<1, [LSUNIT]>]>,
         InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
         InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
-      ]> {
+      ]>;
+
+def HexagonModel : SchedMachineModel {
   // Max issue per cycle == bundle width.
   let IssueWidth = 4;
+  let Itineraries = HexagonItineraries;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 8d6f7b2b7a..9b41126ca6 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -52,12 +52,14 @@ def HexagonItinerariesV4 :
         InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
         InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
         InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
-      ]> {
+      ]>;
+
+def HexagonModelV4 : SchedMachineModel {
   // Max issue per cycle == bundle width.
   let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV4;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Hexagon V4 Resource Definitions -
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 0aafd60548..6c1a816c9f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -131,10 +131,10 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
                     "Intel Atom processors">;
 
 class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : ProcessorModel<Name, GenericModel, Features>;
 
 class AtomProc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, AtomItineraries, Features>;
+ : ProcessorModel<Name, AtomModel, Features>;
 
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 4331cf31e8..c14407f9ac 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -470,14 +470,12 @@ def IIC_NOP : InstrItinClass;
 // latencies. Since these latencies are not used for pipeline hazards,
 // they do not need to be exact.
 //
-// This set of instruction itineraries should contain no reference to
-// InstrStages. When an iterary has no stages, the scheduler can
-// bypass the logic needed for checking pipeline stage hazards.
-def GenericItineraries : MultiIssueItineraries<
-  4, // IssueWidth
-  0, // MinLatency
-  4, // LoadLatency (expected, may be overriden by OperandCycles)
- 10, // HighLatency (expected, may be overriden by OperandCycles)
- [], [], []>; // no FuncUnits, Bypasses, or InstrItinData.
+// The GenericModel contains no instruciton itineraries.
+def GenericModel : SchedMachineModel {
+  let IssueWidth = 4;
+  let MinLatency = 0;
+  let LoadLatency = 4;
+  let HighLatency = 10;
+}
 
 include "X86ScheduleAtom.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 56dd3407b2..87102614cc 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -22,12 +22,7 @@ def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
 def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
                       // SIMD/FP: SIMD ALU, FP Adder
 
-def AtomItineraries : MultiIssueItineraries<
-  2, // IssueWidth=2 allows 2 instructions per scheduling group.
-  1, // MinLatency=1. InstrStage cycles overrides MinLatency.
-     //               OperandCycles may be used for expected latency.
-  3, // LoadLatency (expected, may be overriden by OperandCycles)
-  30,// HighLatency (expected, may be overriden by OperandCycles)
+def AtomItineraries : ProcessorItineraries<
   [ Port0, Port1 ],
   [], [
   // P0 only
@@ -523,3 +518,13 @@ def AtomItineraries : MultiIssueItineraries<
   InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
   ]>;
 
+// Atom machine model.
+def AtomModel : SchedMachineModel {
+  let IssueWidth = 2;  // Allows 2 instructions per scheduling group.
+  let MinLatency = 1;  // InstrStage cycles overrides MinLatency.
+                       // OperandCycles may be used for expected latency.
+  let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
+  let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+
+  let Itineraries = AtomItineraries;
+}
author	Andrew Trick <atrick@apple.com>	2012-07-07 04:00:00 +0000
committer	Andrew Trick <atrick@apple.com>	2012-07-07 04:00:00 +0000
commit	2661b411ccc81b1fe19194d3f43b2630cbef3f28 (patch)
tree	0decaebaee6c3a1a9d42df6b5619de1ffb2fac7d /lib
parent	06495cd7f2a91c4f659eac5e55b1c08b014d0a08 (diff)