diff options
-rw-r--r-- | include/llvm/CodeGen/LiveIntervalAnalysis.h | 10 | ||||
-rw-r--r-- | lib/CodeGen/LiveIntervalAnalysis.cpp | 6 | ||||
-rw-r--r-- | lib/CodeGen/SimpleRegisterCoalescing.cpp | 25 | ||||
-rw-r--r-- | test/CodeGen/X86/phys_subreg_coalesce-3.ll | 35 |
4 files changed, 61 insertions, 15 deletions
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index e8856ac103..1a2cc25ba4 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -127,11 +127,11 @@ namespace llvm { bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm, unsigned reg); - /// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except - /// it can check use as well. - bool conflictsWithPhysRegRef(LiveInterval &li, unsigned Reg, - bool CheckUse, - SmallPtrSet<MachineInstr*,32> &JoinedCopies); + /// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except + /// it checks for sub-register reference and it can check use as well. + bool conflictsWithSubPhysRegRef(LiveInterval &li, unsigned Reg, + bool CheckUse, + SmallPtrSet<MachineInstr*,32> &JoinedCopies); // Interval creation LiveInterval &getOrCreateInterval(unsigned reg) { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index f8b1707459..dbb5e19191 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -218,9 +218,9 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, return false; } -/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except -/// it can check use as well. -bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, +/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except +/// it checks for sub-register reference and it can check use as well. +bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, unsigned Reg, bool CheckUse, SmallPtrSet<MachineInstr*,32> &JoinedCopies) { for (LiveInterval::Ranges::const_iterator diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index d25df1d2ad..5c621181cd 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1260,10 +1260,10 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); assert(RealDstReg && "Invalid extract_subreg instruction!"); + LiveInterval &RHS = li_->getInterval(SrcReg); // For this type of EXTRACT_SUBREG, conservatively // check if the live interval of the source register interfere with the // actual super physical register we are trying to coalesce with. - LiveInterval &RHS = li_->getInterval(SrcReg); if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { DEBUG({ @@ -1273,7 +1273,11 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) - if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + // Do not check DstReg or its sub-register. JoinIntervals() will take care + // of that. + if (*SR != DstReg && + !tri_->isSubRegister(DstReg, *SR) && + li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ dbgs() << "Interfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); @@ -1294,9 +1298,9 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); assert(RealSrcReg && "Invalid extract_subreg instruction!"); - LiveInterval &RHS = li_->getInterval(DstReg); + LiveInterval &LHS = li_->getInterval(DstReg); if (li_->hasInterval(RealSrcReg) && - RHS.overlaps(li_->getInterval(RealSrcReg))) { + LHS.overlaps(li_->getInterval(RealSrcReg))) { DEBUG({ dbgs() << "Interfere with register "; li_->getInterval(RealSrcReg).print(dbgs(), tri_); @@ -1304,7 +1308,11 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) - if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + // Do not check SrcReg or its sub-register. JoinIntervals() will take care + // of that. + if (*SR != SrcReg && + !tri_->isSubRegister(SrcReg, *SR) && + li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ dbgs() << "Interfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); @@ -1476,6 +1484,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + // FIXME: The following checks are somewhat conservative. Perhaps a better + // way to implement this is to treat this as coalescing a vr with the + // super physical register. if (isExtSubReg) { if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) return false; // Not coalescable @@ -2205,7 +2216,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, li_->intervalIsInOneMBB(RHS) && li_->getApproximateInstructionCount(RHS) <= 10) { // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) + if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) return false; } else { for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) @@ -2222,7 +2233,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, if (LHS.containsOneValue() && li_->getApproximateInstructionCount(LHS) <= 10) { // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) + if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) return false; } else { for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll new file mode 100644 index 0000000000..f23669ed9a --- /dev/null +++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; rdar://5571034 + +define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp { +; CHECK: foo: +entry: + %j.03 = add i32 %bbSize, -1 ; <i32> [#uses=2] + %0 = icmp sgt i32 %j.03, -1 ; <i1> [#uses=1] + br i1 %0, label %bb.nph, label %return + +bb.nph: ; preds = %entry + %tmp9 = add i32 %bbStart, %bbSize ; <i32> [#uses=1] + %tmp10 = add i32 %tmp9, -1 ; <i32> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph +; CHECK: %bb +; CHECK-NOT: movb {{.*}}l, %cl +; CHECK: sarl %cl + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3] + %j.06 = sub i32 %j.03, %indvar ; <i32> [#uses=1] + %tmp11 = sub i32 %tmp10, %indvar ; <i32> [#uses=1] + %scevgep = getelementptr i32* %ptr, i32 %tmp11 ; <i32*> [#uses=1] + %1 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %2 = ashr i32 %j.06, %shifts ; <i32> [#uses=1] + %3 = and i32 %2, 65535 ; <i32> [#uses=1] + %4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1] + store i32 %3, i32* %4, align 4 + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %bbSize ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} |