diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2013-01-03 00:47:51 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2013-01-03 00:47:51 +0000 |
commit | 251ed7f3e59a857dd92bda1ba4f9305f33deb67b (patch) | |
tree | 40200e5363ae38bc59c8545ce6fe34943e1c90f6 | |
parent | 47990e974355dd58d1f320f252e1a7ed3a79e034 (diff) |
Fix PR14732 by handling all kinds of IMPLICIT_DEF live ranges.
Most IMPLICIT_DEF instructions are removed by the ProcessImplicitDefs
pass, and a few are reinserted by PHIElimination when a PHI argument is
<undef>.
RegisterCoalescer was assuming that all IMPLICIT_DEF live ranges look
like those created by PHIElimination, and that their live range never
leaves the basic block.
The PR14732 test case does tricks with PHI nodes that causes a longer
IMPLICIT_DEF live range to appear. This happens very rarely, but
RegisterCoalescer should be able to handle it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171435 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/RegisterCoalescer.cpp | 45 | ||||
-rw-r--r-- | test/CodeGen/X86/coalesce-implicitdef.ll | 130 |
2 files changed, 167 insertions, 8 deletions
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index b5d80032ae..36d8101422 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1282,8 +1282,18 @@ class JoinVals { // Value in the other live range that overlaps this def, if any. VNInfo *OtherVNI; - // Is this value an IMPLICIT_DEF? - bool IsImplicitDef; + // Is this value an IMPLICIT_DEF that can be erased? + // + // IMPLICIT_DEF values should only exist at the end of a basic block that + // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + // safely erased if they are overlapping a live value in the other live + // interval. + // + // Weird control flow graphs and incomplete PHI handling in + // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + // longer live ranges. Such IMPLICIT_DEF values should be treated like + // normal values. + bool ErasableImplicitDef; // True when the live range of this value will be pruned because of an // overlapping CR_Replace value in the other live range. @@ -1293,8 +1303,8 @@ class JoinVals { bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), - RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), - PrunedComputed(false) {} + RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false), + Pruned(false), PrunedComputed(false) {} bool isAnalyzed() const { return WriteLanes != 0; } }; @@ -1432,7 +1442,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // An IMPLICIT_DEF writes undef values. if (DefMI->isImplicitDef()) { - V.IsImplicitDef = true; + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; V.ValidLanes &= ~V.WriteLanes; } } @@ -1485,7 +1498,22 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // We have overlapping values, or possibly a kill of Other. // Recursively compute assignments up the dominator tree. Other.computeAssignment(V.OtherVNI->id, *this); - const Val &OtherV = Other.Vals[V.OtherVNI->id]; + Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. + // This shouldn't normally happen, but ProcessImplicitDefs can leave such + // IMPLICIT_DEF instructions behind, and there is nothing wrong with it + // technically. + // + // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // to erase the IMPLICIT_DEF instruction. + if (OtherV.ErasableImplicitDef && DefMI && + DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { + DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into BB#" << DefMI->getParent()->getNumber() + << ", keeping it.\n"); + OtherV.ErasableImplicitDef = false; + } // Allow overlapping PHI values. Any real interference would show up in a // predecessor, the PHI itself can't introduce any conflicts. @@ -1794,7 +1822,8 @@ void JoinVals::pruneValues(JoinVals &Other, // predecessors, so the instruction should simply go away once its value // has been replaced. Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; - bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; + bool EraseImpDef = OtherV.ErasableImplicitDef && + OtherV.Resolution == CR_Keep; if (!Def.isBlock()) { // Remove <def,read-undef> flags. This def is now a partial redef. // Also remove <def,dead> flags since the joined live range will @@ -1843,7 +1872,7 @@ void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any // longer. The IMPLICIT_DEF instructions are only inserted by // PHIElimination to guarantee that all PHI predecessors have a value. - if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) + if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; // Remove value number i from LI. Note that this VNInfo is still present // in NewVNInfo, so it will appear as an unused value number in the final diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll new file mode 100644 index 0000000000..19cd08cf37 --- /dev/null +++ b/test/CodeGen/X86/coalesce-implicitdef.ll @@ -0,0 +1,130 @@ +; RUN: llc < %s -verify-coalescing +; PR14732 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10" + +@c = common global i32 0, align 4 +@b = common global i32 0, align 4 +@a = common global i32 0, align 4 +@d = common global i32 0, align 4 + +; This function creates an IMPLICIT_DEF with a long live range, even after +; ProcessImplicitDefs. +; +; The coalescer should be able to deal with all kinds of IMPLICIT_DEF live +; ranges, even if they are not common. + +define void @f() nounwind uwtable ssp { +entry: + %i = alloca i32, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc34, %entry + %i.0.load44 = phi i32 [ %inc35, %for.inc34 ], [ undef, %entry ] + %pi.0 = phi i32* [ %pi.4, %for.inc34 ], [ undef, %entry ] + %tobool = icmp eq i32 %i.0.load44, 0 + br i1 %tobool, label %for.end36, label %for.body + +for.body: ; preds = %for.cond + store i32 0, i32* @c, align 4, !tbaa !0 + br label %for.body2 + +for.body2: ; preds = %for.body, %for.inc + %i.0.load45 = phi i32 [ %i.0.load44, %for.body ], [ 0, %for.inc ] + %tobool3 = icmp eq i32 %i.0.load45, 0 + br i1 %tobool3, label %if.then10, label %if.then + +if.then: ; preds = %for.body2 + store i32 0, i32* %i, align 4, !tbaa !0 + br label %for.body6 + +for.body6: ; preds = %if.then, %for.body6 + store i32 0, i32* %i, align 4 + br i1 true, label %for.body6, label %for.inc + +if.then10: ; preds = %for.body2 + store i32 1, i32* @b, align 4, !tbaa !0 + ret void + +for.inc: ; preds = %for.body6 + br i1 undef, label %for.body2, label %if.end30 + +while.condthread-pre-split: ; preds = %label.loopexit, %while.condthread-pre-split.lr.ph.lr.ph, %for.inc27.backedge + %0 = phi i32 [ %inc28, %for.inc27.backedge ], [ %inc285863, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc2858, %label.loopexit ] + %inc2060 = phi i32 [ %inc20, %for.inc27.backedge ], [ %a.promoted.pre, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc20, %label.loopexit ] + br label %while.cond + +while.cond: ; preds = %while.condthread-pre-split, %while.cond + %p2.1.in = phi i32* [ %pi.3.ph, %while.cond ], [ %i, %while.condthread-pre-split ] + %p2.1 = bitcast i32* %p2.1.in to i16* + br i1 %tobool19, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %inc20 = add nsw i32 %inc2060, 1 + %tobool21 = icmp eq i32 %inc2060, 0 + br i1 %tobool21, label %for.inc27.backedge, label %if.then22 + +for.inc27.backedge: ; preds = %while.end, %if.then22 + %inc28 = add nsw i32 %0, 1 + store i32 %inc28, i32* @b, align 4, !tbaa !0 + %tobool17 = icmp eq i32 %inc28, 0 + br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split + +if.then22: ; preds = %while.end + %1 = load i16* %p2.1, align 2, !tbaa !3 + %tobool23 = icmp eq i16 %1, 0 + br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit + +label.loopexit: ; preds = %if.then22 + store i32 %inc20, i32* @a, align 4, !tbaa !0 + %inc2858 = add nsw i32 %0, 1 + store i32 %inc2858, i32* @b, align 4, !tbaa !0 + %tobool1759 = icmp eq i32 %inc2858, 0 + br i1 %tobool1759, label %if.end30, label %while.condthread-pre-split + +for.inc27.if.end30.loopexit56_crit_edge: ; preds = %for.inc27.backedge + store i32 %inc20, i32* @a, align 4, !tbaa !0 + br label %if.end30 + +if.end30: ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc + %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ] + %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ] + %2 = load i32* %pi.4, align 4, !tbaa !0 + %tobool31 = icmp eq i32 %2, 0 + br i1 %tobool31, label %for.inc34, label %label.preheader + +for.inc34: ; preds = %if.end30 + %inc35 = add nsw i32 %i.0.load46, 1 + store i32 %inc35, i32* %i, align 4 + br label %for.cond + +for.end36: ; preds = %for.cond + store i32 1, i32* %i, align 4 + %3 = load i32* @c, align 4, !tbaa !0 + %tobool37 = icmp eq i32 %3, 0 + br i1 %tobool37, label %label.preheader, label %land.rhs + +land.rhs: ; preds = %for.end36 + store i32 0, i32* @a, align 4, !tbaa !0 + br label %label.preheader + +label.preheader: ; preds = %for.end36, %if.end30, %land.rhs + %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ] + %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ] + %4 = load i32* @b, align 4, !tbaa !0 + %inc285863 = add nsw i32 %4, 1 + store i32 %inc285863, i32* @b, align 4, !tbaa !0 + %tobool175964 = icmp eq i32 %inc285863, 0 + br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph + +while.condthread-pre-split.lr.ph.lr.ph: ; preds = %label.preheader + %.pr50 = load i32* @d, align 4, !tbaa !0 + %tobool19 = icmp eq i32 %.pr50, 0 + %a.promoted.pre = load i32* @a, align 4, !tbaa !0 + br label %while.condthread-pre-split +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"short", metadata !1} |