aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2009-11-13 21:02:15 +0000
committerDan Gohman <gohman@apple.com>2009-11-13 21:02:15 +0000
commitc4c550c7584b3240bda71d4339ec49c1cf731d55 (patch)
treeb87f263bab10843186383b4c85cc7a064ed037e5
parent236490d870c2734203ecf26c8f104d0b3283e69e (diff)
When optimizing for size, don't tail-merge unless it's likely to be a
code-size win, and not when it's only likely to be code-size neutral, such as when only a single instruction would be eliminated and a new branch would be required. This fixes rdar://7392894. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88692 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/BranchFolding.cpp21
-rw-r--r--test/CodeGen/X86/tail-opts.ll113
2 files changed, 125 insertions, 9 deletions
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index dd17d881c5..6606316735 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -519,21 +519,24 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
return true;
// If both blocks have an unconditional branch temporarily stripped out,
- // treat that as an additional common instruction.
- if (MBB1 != PredBB && MBB2 != PredBB &&
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
!MBB1->back().getDesc().isBarrier() &&
!MBB2->back().getDesc().isBarrier())
- --minCommonTailLength;
+ ++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
- if (CommonTailLen >= minCommonTailLength)
+ if (EffectiveTailLen >= minCommonTailLength)
return true;
- // If we are optimizing for code size, 1 instruction in common is enough if
- // we don't have to split a block. At worst we will be replacing a
- // fallthrough into the common tail with a branch, which at worst breaks
- // even with falling through into the duplicated common tail.
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index fdf96d6086..0d86e56132 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -293,3 +293,116 @@ return:
}
declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}