diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-05-28 23:26:21 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-05-28 23:26:21 +0000 |
commit | 046fa3f90a31ebfa10df89ae348f478d492709a9 (patch) | |
tree | 1d3415f2b8e95c2566e2bd357775c8b5be581d46 | |
parent | c10f5434b4ad0758f948c52c18d5740c7f44e8b3 (diff) |
Fix some latency computation bugs: if the use is not a machine opcode do not just return zero.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105061 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 13 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 21 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/ARM/reg_sequence.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll | 4 |
5 files changed, 43 insertions, 22 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 820ba66816..ca723bea88 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return left->getHeight() > right->getHeight(); } else if (RStall) return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + return BURRSort(left, right, SPQ); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3185c88b82..b45c28f4be 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - SU->SchedulingPref = TLI.getSchedulingPreference(N); + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); return SU; } @@ -364,8 +368,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (Cost >= 0) PhysReg = 0; - const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg); + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); @@ -427,15 +433,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 2ac408449a..3708dc3fdc 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r12, #-128] -; CHECK: vstr.32 s0, [r12, #-96] -; CHECK: vstr.32 s0, [r12, #-64] -; CHECK: vstr.32 s0, [r12, #-32] -; CHECK: vstr.32 s0, [r12] -; CHECK: vstr.32 s0, [r12, #32] -; CHECK: vstr.32 s0, [r12, #64] -; CHECK: vstr.32 s0, [r12, #96] +; CHECK: vstr.32 s0, [r9, #-128] +; CHECK: vstr.32 s0, [r9, #-96] +; CHECK: vstr.32 s0, [r9, #-64] +; CHECK: vstr.32 s0, [r9, #-32] +; CHECK: vstr.32 s0, [r9] +; CHECK: vstr.32 s0, [r9, #32] +; CHECK: vstr.32 s0, [r9, #64] +; CHECK: vstr.32 s0, [r9, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -626,8 +626,8 @@ bb24: ; preds = %bb23 ; LSR should use count-down iteration to avoid requiring the trip count ; in a register, and it shouldn't require any reloads here. -; CHECK: sub.w r9, r9, #1 -; CHECK-NEXT: cmp.w r9, #0 +; CHECK: subs r3, #1 +; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 3ba82ccdfa..8199d4664f 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -45,9 +45,9 @@ define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocaptur entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK: vld1.16 -; CHECK-NOT: vmov ; CHECK: vmul.i16 +; CHECK-NOT: vmov +; CHECK: vld1.16 ; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vst1.16 @@ -238,8 +238,9 @@ bb14: ; preds = %bb6 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: t9: ; CHECK: vldr.64 +; CHECK-NOT: vmov d{{.*}}, d0 ; CHECK: vmov.i8 d1 -; CHECK-NEXT: vstmia r0, {d2,d3} +; CHECK-NEXT: vstmia r0, {d0,d1} ; CHECK-NEXT: vstmia r0, {d0,d1} %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll index 50a02781fd..6a3c440bc9 100644 --- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll +++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll @@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0" define void @foo(i32 %y) nounwind ssp { entry: ; CHECK: foo -; CHECK: add r4 -; CHECK: 0(r4) +; CHECK: add r3 +; CHECK: 0(r3) %y_addr = alloca i32 ; <i32*> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] store i32 %y, i32* %y_addr |