aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp39
-rw-r--r--test/CodeGen/X86/avoid-loop-align-2.ll45
2 files changed, 78 insertions, 6 deletions
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 2e1d12d234..61a8b12860 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -62,6 +62,8 @@ namespace {
private:
bool OptimizeIntraLoopEdges();
+ bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign);
bool AlignLoops(MachineFunction &MF);
};
@@ -244,14 +246,37 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges() {
/// should be aligned. For now, we will not align it if all the predcessors
/// (i.e. loop back edges) are laid out above the header. FIXME: Do not
/// align small loops.
-static bool HeaderShouldBeAligned(MachineBasicBlock *MBB) {
+bool
+CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) {
+ if (DoNotAlign.count(MBB))
+ return false;
+
+ bool BackEdgeBelow = false;
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
MachineBasicBlock *PredMBB = *PI;
- if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber())
- return true;
+ if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) {
+ BackEdgeBelow = true;
+ break;
+ }
+ }
+
+ if (!BackEdgeBelow)
+ return false;
+
+ // Ok, we are going to align this loop header. If it's an inner loop,
+ // do not align its outer loop.
+ MachineBasicBlock *PreHeader = L->getLoopPreheader();
+ if (PreHeader) {
+ MachineLoop *L = MLI->getLoopFor(PreHeader);
+ if (L) {
+ MachineBasicBlock *HeaderBlock = L->getHeader();
+ HeaderBlock->setAlignment(0);
+ DoNotAlign.insert(HeaderBlock);
+ }
}
- return false;
+ return true;
}
/// AlignLoops - Align loop headers to target preferred alignments.
@@ -269,14 +294,16 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
MF.RenumberBlocks();
bool Changed = false;
+ SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign;
for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) {
MachineBasicBlock *HeaderMBB = LoopHeaders[i];
MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB));
- if (MLI->getLoopFor(HeaderMBB) == MLI->getLoopFor(PredMBB))
+ MachineLoop *L = MLI->getLoopFor(HeaderMBB);
+ if (L == MLI->getLoopFor(PredMBB))
// If previously BB is in the same loop, don't align this BB. We want
// to prevent adding noop's inside a loop.
continue;
- if (HeaderShouldBeAligned(HeaderMBB)) {
+ if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) {
HeaderMBB->setAlignment(Align);
Changed = true;
++NumHeaderAligned;
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
new file mode 100644
index 0000000000..9f0aeb32c4
--- /dev/null
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep align | count 3
+
+@x = external global i32* ; <i32**> [#uses=1]
+
+define i32 @t(i32 %a, i32 %b) nounwind readonly ssp {
+entry:
+ %0 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb5, label %bb.nph12
+
+bb.nph12: ; preds = %entry
+ %1 = icmp eq i32 %b, 0 ; <i1> [#uses=1]
+ %2 = load i32** @x, align 8 ; <i32*> [#uses=1]
+ br i1 %1, label %bb2.preheader, label %bb2.preheader.us
+
+bb2.preheader.us: ; preds = %bb2.bb3_crit_edge.us, %bb.nph12
+ %indvar18 = phi i32 [ 0, %bb.nph12 ], [ %indvar.next19, %bb2.bb3_crit_edge.us ] ; <i32> [#uses=2]
+ %sum.111.us = phi i32 [ 0, %bb.nph12 ], [ %4, %bb2.bb3_crit_edge.us ] ; <i32> [#uses=0]
+ %tmp16 = mul i32 %indvar18, %a ; <i32> [#uses=1]
+ br label %bb1.us
+
+bb1.us: ; preds = %bb1.us, %bb2.preheader.us
+ %indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ] ; <i32> [#uses=2]
+ %tmp17 = add i32 %indvar, %tmp16 ; <i32> [#uses=1]
+ %tmp. = zext i32 %tmp17 to i64 ; <i64> [#uses=1]
+ %3 = getelementptr i32* %2, i64 %tmp. ; <i32*> [#uses=1]
+ %4 = load i32* %3, align 4 ; <i32> [#uses=2]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %indvar.next, %b ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
+
+bb2.bb3_crit_edge.us: ; preds = %bb1.us
+ %indvar.next19 = add i32 %indvar18, 1 ; <i32> [#uses=2]
+ %exitcond22 = icmp eq i32 %indvar.next19, %a ; <i1> [#uses=1]
+ br i1 %exitcond22, label %bb5, label %bb2.preheader.us
+
+bb2.preheader: ; preds = %bb2.preheader, %bb.nph12
+ %indvar24 = phi i32 [ %indvar.next25, %bb2.preheader ], [ 0, %bb.nph12 ] ; <i32> [#uses=1]
+ %indvar.next25 = add i32 %indvar24, 1 ; <i32> [#uses=2]
+ %exitcond28 = icmp eq i32 %indvar.next25, %a ; <i1> [#uses=1]
+ br i1 %exitcond28, label %bb5, label %bb2.preheader
+
+bb5: ; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry
+ %sum.1.lcssa = phi i32 [ 0, %entry ], [ 0, %bb2.preheader ], [ %4, %bb2.bb3_crit_edge.us ] ; <i32> [#uses=1]
+ ret i32 %sum.1.lcssa
+}