26 files changed, 447 insertions, 774 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index acd2a03354..e1a2d3649a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2029,13 +2029,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
 
   // Masked compares sometimes use the same register as the corresponding 'and'.
   if (CmpMask != ~0) {
-    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+    if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
       MI = 0;
       for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
            UE = MRI->use_end(); UI != UE; ++UI) {
         if (UI->getParent() != CmpInstr->getParent()) continue;
         MachineInstr *PotentialAND = &*UI;
-        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+            isPredicated(PotentialAND))
           continue;
         MI = PotentialAND;
         break;
@@ -2101,6 +2102,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   // The single candidate is called MI.
   if (!MI) MI = Sub;
 
+  // We can't use a predicated instruction - it doesn't always write the flags.
+  if (isPredicated(MI))
+    return false;
+
   switch (MI->getOpcode()) {
   default: break;
   case ARM::RSBrr:
@@ -2207,6 +2212,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
     // Toggle the optional operand to CPSR.
     MI->getOperand(5).setReg(ARM::CPSR);
     MI->getOperand(5).setIsDef(true);
+    assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
     CmpInstr->eraseFromParent();
 
     // Modify the condition code of operands in OperandsToUpdate.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ca0fa57b86..bc15dcf4fc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -524,6 +524,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
 
     // Neon does not support some operations on v1i64 and v2i64 types.
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -832,12 +833,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::MUL);
-
-  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
-    setTargetDAGCombine(ISD::AND);
-    setTargetDAGCombine(ISD::OR);
-    setTargetDAGCombine(ISD::XOR);
-  }
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::XOR);
 
   if (Subtarget->hasV6Ops())
     setTargetDAGCombine(ISD::SRL);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 832d1394bc..b94f963507 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
   Code32Directive = ".code\t32";
 
   WeakRefDirective = "\t.weak\t";
-  LCOMMDirectiveType = LCOMM::NoAlignment;
 
   HasLEB128 = true;
   SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index a51e0fa3fb..95640f7df9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -410,7 +410,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   if (Type == macho::RIT_ARM_Half) {
     // The other-half value only gets populated for the movt and movw
     // relocation entries.
-    uint32_t Value = 0;;
+    uint32_t Value = 0;
     switch ((unsigned)Fixup.getKind()) {
     default: break;
     case ARM::fixup_arm_movw_lo16:
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 6a37639889..838f7b5ed7 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -20,205 +20,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
-                                  cl::desc("Force top-down list scheduling"));
-static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
-                                   cl::desc("Force bottom-up list scheduling"));
-
-#ifndef NDEBUG
-static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
-  cl::desc("Pop up a window to show MISched dags after they are processed"));
-
-static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
-  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
-#else
-static bool ViewMISchedDAGs = false;
-#endif // NDEBUG
-
-/// Decrement this iterator until reaching the top or a non-debug instr.
-static MachineBasicBlock::iterator
-priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
-  assert(I != Beg && "reached the top of the region, cannot decrement");
-  while (--I != Beg) {
-    if (!I->isDebugValue())
-      break;
-  }
-  return I;
-}
-
-/// If this iterator is a debug value, increment until reaching the End or a
-/// non-debug instruction.
-static MachineBasicBlock::iterator
-nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
-  for(; I != End; ++I) {
-    if (!I->isDebugValue())
-      break;
-  }
-  return I;
-}
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
-/// NumPredsLeft reaches zero, release the successor node.
-///
-/// FIXME: Adjust SuccSU height based on MinLatency.
-void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
-  SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
-  if (SuccSU->NumPredsLeft == 0) {
-    dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
-    dbgs() << " has been released too many times!\n";
-    llvm_unreachable(0);
-  }
-#endif
-  --SuccSU->NumPredsLeft;
-  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
-    SchedImpl->releaseTopNode(SuccSU);
-}
-
-/// releaseSuccessors - Call releaseSucc on each of SU's successors.
-void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    releaseSucc(SU, &*I);
-  }
-}
-
-/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
-/// NumSuccsLeft reaches zero, release the predecessor node.
-///
-/// FIXME: Adjust PredSU height based on MinLatency.
-void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
-  SUnit *PredSU = PredEdge->getSUnit();
-
-#ifndef NDEBUG
-  if (PredSU->NumSuccsLeft == 0) {
-    dbgs() << "*** Scheduling failed! ***\n";
-    PredSU->dump(this);
-    dbgs() << " has been released too many times!\n";
-    llvm_unreachable(0);
-  }
-#endif
-  --PredSU->NumSuccsLeft;
-  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
-    SchedImpl->releaseBottomNode(PredSU);
-}
-
-/// releasePredecessors - Call releasePred on each of SU's predecessors.
-void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
-  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    releasePred(SU, &*I);
-  }
-}
-
-void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
-                                    MachineBasicBlock::iterator InsertPos) {
-  // Advance RegionBegin if the first instruction moves down.
-  if (&*RegionBegin == MI)
-    ++RegionBegin;
-
-  // Update the instruction stream.
-  BB->splice(InsertPos, BB, MI);
-
-  // Update LiveIntervals
-  LIS->handleMove(MI);
-
-  // Recede RegionBegin if an instruction moves above the first.
-  if (RegionBegin == InsertPos)
-    RegionBegin = MI;
-}
-
-bool VLIWMachineScheduler::checkSchedLimit() {
-#ifndef NDEBUG
-  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
-    CurrentTop = CurrentBottom;
-    return false;
-  }
-  ++NumInstrsScheduled;
-#endif
-  return true;
-}
-
-/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
-/// crossing a scheduling boundary. [begin, end) includes all instructions in
-/// the region, including the boundary itself and single-instruction regions
-/// that don't get scheduled.
-void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
-                                MachineBasicBlock::iterator begin,
-                                MachineBasicBlock::iterator end,
-                                unsigned endcount)
-{
-  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
-
-  // For convenience remember the end of the liveness region.
-  LiveRegionEnd =
-    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
-}
-
-// Setup the register pressure trackers for the top scheduled top and bottom
-// scheduled regions.
-void VLIWMachineScheduler::initRegPressure() {
-  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
-  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
-
-  // Close the RPTracker to finalize live ins.
-  RPTracker.closeRegion();
-
-  DEBUG(RPTracker.getPressure().dump(TRI));
-
-  // Initialize the live ins and live outs.
-  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
-  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
-
-  // Close one end of the tracker so we can call
-  // getMaxUpward/DownwardPressureDelta before advancing across any
-  // instructions. This converts currently live regs into live ins/outs.
-  TopRPTracker.closeTop();
-  BotRPTracker.closeBottom();
-
-  // Account for liveness generated by the region boundary.
-  if (LiveRegionEnd != RegionEnd)
-    BotRPTracker.recede();
-
-  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
-
-  // Cache the list of excess pressure sets in this region. This will also track
-  // the max pressure in the scheduled code for these sets.
-  RegionCriticalPSets.clear();
-  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
-  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
-    unsigned Limit = TRI->getRegPressureSetLimit(i);
-    if (RegionPressure[i] > Limit)
-      RegionCriticalPSets.push_back(PressureElement(i, 0));
-  }
-  DEBUG(dbgs() << "Excess PSets: ";
-        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
-          dbgs() << TRI->getRegPressureSetName(
-            RegionCriticalPSets[i].PSetID) << " ";
-        dbgs() << "\n");
-
-  // Reset resource state.
-  TopResourceModel->resetPacketState();
-  TopResourceModel->resetDFA();
-  BotResourceModel->resetPacketState();
-  BotResourceModel->resetDFA();
-  TotalPackets = 0;
-}
-
-// FIXME: When the pressure tracker deals in pressure differences then we won't
-// iterate over all RegionCriticalPSets[i].
-void VLIWMachineScheduler::
-updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
-  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
-    unsigned ID = RegionCriticalPSets[i].PSetID;
-    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
-    if ((int)NewMaxPressure[ID] > MaxUnits)
-      MaxUnits = NewMaxPressure[ID];
-  }
-}
-
 /// Check if scheduling of this SU is possible
 /// in the current packet.
 /// It is _not_ precise (statefull), it is more like
@@ -264,13 +65,15 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
 }
 
 /// Keep track of available resources.
-void VLIWResourceModel::reserveResources(SUnit *SU) {
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+  bool startNewCycle = false;
   // If this SU does not fit in the packet
   // start a new one.
   if (!isResourceAvailable(SU)) {
     ResourcesModel->clearResources();
     Packet.clear();
     TotalPackets++;
+    startNewCycle = true;
   }
 
   switch (SU->getInstr()->getOpcode()) {
@@ -295,7 +98,8 @@ void VLIWResourceModel::reserveResources(SUnit *SU) {
   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
     DEBUG(dbgs() << "\t[" << i << "] SU(");
-    DEBUG(dbgs() << Packet[i]->NodeNum << ")\n");
+    DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+    DEBUG(Packet[i]->getInstr()->dump());
   }
 #endif
 
@@ -305,27 +109,10 @@ void VLIWResourceModel::reserveResources(SUnit *SU) {
     ResourcesModel->clearResources();
     Packet.clear();
     TotalPackets++;
+    startNewCycle = true;
   }
-}
 
-// Release all DAG roots for scheduling.
-void VLIWMachineScheduler::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
-
-  for (std::vector<SUnit>::iterator
-         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
-    // A SUnit is ready to top schedule if it has no predecessors.
-    if (I->Preds.empty())
-      SchedImpl->releaseTopNode(&(*I));
-    // A SUnit is ready to bottom schedule if it has no successors.
-    if (I->Succs.empty())
-      BotRoots.push_back(&(*I));
-  }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
+  return startNewCycle;
 }
 
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -339,125 +126,43 @@ void VLIWMachineScheduler::schedule() {
         << " at loop depth "  << MLI->getLoopDepth(BB)
         << " \n");
 
-  // Initialize the register pressure tracker used by buildSchedGraph.
-  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
-
-  // Account for liveness generate by the region boundary.
-  if (LiveRegionEnd != RegionEnd)
-    RPTracker.recede();
-
-  // Build the DAG, and compute current register pressure.
-  buildSchedGraph(AA, &RPTracker);
-
-  // Initialize top/bottom trackers after computing region pressure.
-  initRegPressure();
-
+  buildDAGWithRegPressure();
+
+  // To view Height/Depth correctly, they should be accessed at least once.
+  DEBUG(unsigned maxH = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getHeight() > maxH)
+            maxH = SUnits[su].getHeight();
+        dbgs() << "Max Height " << maxH << "\n";);
+  DEBUG(unsigned maxD = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getDepth() > maxD)
+            maxD = SUnits[su].getDepth();
+        dbgs() << "Max Depth " << maxD << "\n";);
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
-  if (ViewMISchedDAGs) viewGraph();
-
-  SchedImpl->initialize(this);
+  initQueues();
 
-  // Release edges from the special Entry node or to the special Exit node.
-  releaseSuccessors(&EntrySU);
-  releasePredecessors(&ExitSU);
-
-  // Release all DAG roots for scheduling.
-  releaseRoots();
-
-  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
-  CurrentBottom = RegionEnd;
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
     if (!checkSchedLimit())
       break;
 
-    // Move the instruction to its new location in the instruction stream.
-    MachineInstr *MI = SU->getInstr();
-
-    if (IsTopNode) {
-      assert(SU->isTopReady() && "node still has unscheduled dependencies");
-      if (&*CurrentTop == MI)
-        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
-      else {
-        moveInstruction(MI, CurrentTop);
-        TopRPTracker.setPos(MI);
-      }
+    scheduleMI(SU, IsTopNode);
 
-      // Update top scheduled pressure.
-      TopRPTracker.advance();
-      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
-      updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
-
-      // Update DFA state.
-      TopResourceModel->reserveResources(SU);
-
-      // Release dependent instructions for scheduling.
-      releaseSuccessors(SU);
-    }
-    else {
-      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
-      MachineBasicBlock::iterator priorII =
-        priorNonDebug(CurrentBottom, CurrentTop);
-      if (&*priorII == MI)
-        CurrentBottom = priorII;
-      else {
-        if (&*CurrentTop == MI) {
-          CurrentTop = nextIfDebug(++CurrentTop, priorII);
-          TopRPTracker.setPos(CurrentTop);
-        }
-        moveInstruction(MI, CurrentBottom);
-        CurrentBottom = MI;
-      }
-      // Update bottom scheduled pressure.
-      BotRPTracker.recede();
-      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
-      updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
-
-      // Update DFA state.
-      BotResourceModel->reserveResources(SU);
-
-      // Release dependent instructions for scheduling.
-      releasePredecessors(SU);
-    }
-    SU->isScheduled = true;
-    SchedImpl->schedNode(SU, IsTopNode);
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
-  DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() +
-        BotResourceModel->getTotalPackets()<< "packets.\n");
-
   placeDebugValues();
 }
 
-/// Reinsert any remaining debug_values, just like the PostRA scheduler.
-void VLIWMachineScheduler::placeDebugValues() {
-  // If first instruction was a DBG_VALUE then put it back.
-  if (FirstDbgValue) {
-    BB->splice(RegionBegin, BB, FirstDbgValue);
-    RegionBegin = FirstDbgValue;
-  }
-
-  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
-         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
-    MachineInstr *DbgValue = P.first;
-    MachineBasicBlock::iterator OrigPrevMI = P.second;
-    BB->splice(++OrigPrevMI, BB, DbgValue);
-    if (OrigPrevMI == llvm::prior(RegionEnd))
-      RegionEnd = DbgValue;
-  }
-  DbgValues.clear();
-  FirstDbgValue = NULL;
-}
-
-void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
-  DAG = dag;
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+  DAG = static_cast<VLIWMachineScheduler*>(dag);
   TRI = DAG->TRI;
-  Top.DAG = dag;
-  Bot.DAG = dag;
+  Top.DAG = DAG;
+  Bot.DAG = DAG;
 
   // Initialize the HazardRecognizers.
   const TargetMachine &TM = DAG->MF.getTarget();
@@ -465,7 +170,10 @@ void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
-  assert((!ForceTopDown || !ForceBottomUp) &&
+  Top.ResourceModel = new VLIWResourceModel(TM);
+  Bot.ResourceModel = new VLIWResourceModel(TM);
+
+  assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
          "-misched-topdown incompatible with -misched-bottomup");
 }
 
@@ -553,8 +261,7 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
   if (!HazardRec->isEnabled()) {
     // Bypass HazardRec virtual calls.
     CurrCycle = NextCycle;
-  }
-  else {
+  } else {
     // Bypass getHazardType calls in case of long latency.
     for (; CurrCycle != NextCycle; ++CurrCycle) {
       if (isTop())
@@ -571,6 +278,7 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
 
 /// Move the boundary of scheduled code by one SUnit.
 void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+  bool startNewCycle = false;
 
   // Update the reservation table.
   if (HazardRec->isEnabled()) {
@@ -581,13 +289,20 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
     }
     HazardRec->EmitInstruction(SU);
   }
+
+  // Update DFA model.
+  startNewCycle = ResourceModel->reserveResources(SU);
+
   // Check the instruction group dispatch limit.
   // TODO: Check if this SU must end a dispatch group.
   IssueCount += DAG->getNumMicroOps(SU->getInstr());
-  if (IssueCount >= DAG->getIssueWidth()) {
+  if (startNewCycle) {
     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
     bumpCycle();
   }
+  else
+    DEBUG(dbgs() << "*** IssueCount " << IssueCount
+          << " at cycle " << CurrCycle << '\n');
 }
 
 /// Release pending ready nodes in to the available queue. This makes them
@@ -648,8 +363,9 @@ SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
 }
 
 #ifndef NDEBUG
-void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
-                                         SUnit *SU, PressureElement P) {
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+                                             const ReadyQueue &Q,
+                                             SUnit *SU, PressureElement P) {
   dbgs() << Label << " " << Q.getName() << " ";
   if (P.isValid())
     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
@@ -660,10 +376,48 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue
 }
 #endif
 
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+  SUnit *OnlyAvailableSucc = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit &Succ = *I->getSUnit();
+    if (!Succ.isScheduled) {
+      // We found an available, but not scheduled, successor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+        return 0;
+      OnlyAvailableSucc = &Succ;
+    }
+  }
+  return OnlyAvailableSucc;
+}
+
 // Constants used to denote relative importance of
 // heuristic components for cost computation.
 static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
 static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 20;
 static const unsigned ScaleTwo = 10;
 static const unsigned FactorOne = 2;
 
@@ -685,19 +439,44 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
     ResCount += PriorityOne;
 
   // Critical path first.
-  if (Q.getID() == TopQID)
+  if (Q.getID() == TopQID) {
     ResCount += (SU->getHeight() * ScaleTwo);
-  else
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Top.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  } else {
     ResCount += (SU->getDepth() * ScaleTwo);
 
-  // If resources are available for it, multiply the
-  // chance of scheduling.
-  if (DAG->getTopResourceModel()->isResourceAvailable(SU))
-    ResCount <<= FactorOne;
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Bot.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  }
+
+  unsigned NumNodesBlocking = 0;
+  if (Q.getID() == TopQID) {
+    // How many SUs does it block from scheduling?
+    // Look at all of the successors of this node.
+    // Count the number of nodes that
+    // this node is the sole unscheduled node for.
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I)
+      if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  } else {
+    // How many unscheduled predecessors block this node?
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I)
+      if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  }
+  ResCount += (NumNodesBlocking * ScaleTwo);
 
   // Factor in reg pressure as a heuristic.
-  ResCount -= (Delta.Excess.UnitIncrease * PriorityThree);
-  ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree);
+  ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
+  ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
 
   DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
 
@@ -736,7 +515,6 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
       continue;
     }
 
-
     // Best cost.
     if (CurrentCost > Candidate.SCost) {
       DEBUG(traceCandidate("CCAND", Q, *I));
@@ -821,7 +599,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
     return NULL;
   }
   SUnit *SU;
-  if (ForceTopDown) {
+  if (llvm::ForceTopDown) {
     SU = Top.pickOnlyChoice();
     if (!SU) {
       SchedCandidate TopCand;
@@ -832,7 +610,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
       SU = TopCand.SU;
     }
     IsTopNode = true;
-  } else if (ForceBottomUp) {
+  } else if (llvm::ForceBottomUp) {
     SU = Bot.pickOnlyChoice();
     if (!SU) {
       SchedCandidate BotCand;
@@ -859,14 +637,14 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
 }
 
 /// Update the scheduler's state after scheduling a node. This is the same node
-/// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update
-/// it's state based on the current cycle before MachineSchedStrategy does.
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = Top.CurrCycle;
     Top.bumpNode(SU);
-  }
-  else {
+  } else {
     SU->BotReadyCycle = Bot.CurrCycle;
     Bot.bumpNode(SU);
   }
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 7d8cc3d24e..212cc9800b 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -33,102 +33,96 @@
 
 using namespace llvm;
 
-//===----------------------------------------------------------------------===//
-// MachineSchedStrategy - Interface to a machine scheduling algorithm.
-//===----------------------------------------------------------------------===//
-
 namespace llvm {
-class VLIWMachineScheduler;
-
-/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive the selected
-/// scheduling algorithm.
-///
-/// If this works well and targets wish to reuse VLIWMachineScheduler, we may expose it
-/// in ScheduleDAGInstrs.h
-class MachineSchedStrategy {
-public:
-  virtual ~MachineSchedStrategy() {}
-
-  /// Initialize the strategy after building the DAG for a new region.
-  virtual void initialize(VLIWMachineScheduler *DAG) = 0;
-
-  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
-  /// schedule the node at the top of the unscheduled region. Otherwise it will
-  /// be scheduled at the bottom.
-  virtual SUnit *pickNode(bool &IsTopNode) = 0;
-
-  /// Notify MachineSchedStrategy that VLIWMachineScheduler has scheduled a node.
-  virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
-
-  /// When all predecessor dependencies have been resolved, free this node for
-  /// top-down scheduling.
-  virtual void releaseTopNode(SUnit *SU) = 0;
-  /// When all successor dependencies have been resolved, free this node for
-  /// bottom-up scheduling.
-  virtual void releaseBottomNode(SUnit *SU) = 0;
-};
-
 //===----------------------------------------------------------------------===//
-// ConvergingVLIWScheduler - Implementation of the standard MachineSchedStrategy.