diff options
author | Evan Cheng <evan.cheng@apple.com> | 2011-01-28 02:19:21 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2011-01-28 02:19:21 +0000 |
commit | 40f64cb0de40802ddd2f928b62e9564e1e721ff3 (patch) | |
tree | e94f9f5d77d86efd85ac2d6f0ad8a3ced55ca5ee | |
parent | c3a20bab7571ff95525252c379198e67b65d0f1d (diff) |
- Stop simplifycfg from duplicating "ret" instructions into unconditional
branches. PR8575, rdar://5134905, rdar://8911460.
- Allow codegen tail duplication to dup small return blocks after register
allocation is done.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124462 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/TailDuplication.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 7 | ||||
-rw-r--r-- | test/CodeGen/X86/critical-edge-split.ll | 50 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-blocks.ll | 11 | ||||
-rw-r--r-- | test/Transforms/JumpThreading/and-and-cond.ll | 10 | ||||
-rw-r--r-- | test/Transforms/JumpThreading/and-cond.ll | 9 | ||||
-rw-r--r-- | test/Transforms/JumpThreading/thread-loads.ll | 9 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/MagicPointer.ll | 1 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/basictest.ll | 10 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/switch_create.ll | 28 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/switch_formation.dbg.ll | 14 |
11 files changed, 78 insertions, 82 deletions
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index ce4b1be854..15aed3436c 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -465,9 +465,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, MaxDuplicateCount = TailDuplicateSize; if (PreRegAlloc) { - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches. - if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch()) + if (TailBB->empty()) + return false; + const TargetInstrDesc &TID = TailBB->back().getDesc(); + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches and returns. + if (!TID.isIndirectBranch() && !TID.isReturn()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there @@ -502,7 +505,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } // Heuristically, don't tail-duplicate calls if it would expand code size, // as it's less likely to be worth the extra cost. - if (InstrCount > 1 && HasCall) + if (InstrCount > 1 && (PreRegAlloc && HasCall)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f6d7d76dbf..37e6d28d7b 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -36,6 +37,10 @@ #include <map> using namespace llvm; +static cl::opt<bool> +DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { @@ -2027,7 +2032,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) { } // If we found some, do the transformation! - if (!UncondBranchPreds.empty()) { + if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); DEBUG(dbgs() << "FOLDING: " << *BB diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll deleted file mode 100644 index 96fef0fbfc..0000000000 --- a/test/CodeGen/X86/critical-edge-split.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29 - - %CC = type { %Register } - %II = type { %"struct.XX::II::$_74" } - %JITFunction = type %YYValue* (%CC*, %YYValue**) - %YYValue = type { i32 (...)** } - %Register = type { %"struct.XX::ByteCodeFeatures" } - %"struct.XX::ByteCodeFeatures" = type { i32 } - %"struct.XX::II::$_74" = type { i8* } -@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define %YYValue* @loop(%CC*, %YYValue**) nounwind { -; <label>:2 - %3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1] - %4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2] - %5 = load %YYValue** %4 ; <%YYValue*> [#uses=3] - %unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1] - br i1 %unique_1.i, label %loop, label %11 - -loop: ; preds = %6, %2 - %.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3] - %immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1] - br i1 %immediateCmp, label %6, label %8 - -; <label>:6 ; preds = %loop - %lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1] - %7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2] - %intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1] - %intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1] - %intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1] - br i1 %intAddOverflow, label %.loopexit, label %loop - -; <label>:8 ; preds = %loop - ret %YYValue* inttoptr (i32 10 to %YYValue*) - -.loopexit: ; preds = %6 - %9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1] - store %YYValue* %.1, %YYValue** %9 - store %YYValue* %5, %YYValue** %4 - %10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1] - ret %YYValue* %10 - -; <label>:11 ; preds = %2 - %12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1] - ret %YYValue* %12 -} - -declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind - -declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll index 354d082069..faba630071 100644 --- a/test/CodeGen/X86/loop-blocks.ll +++ b/test/CodeGen/X86/loop-blocks.ll @@ -70,6 +70,7 @@ exit: ; Same as slightly_more_involved, but block_a is now a CFG diamond with ; fallthrough edges which should be preserved. +; "callq block_a_merge_func" is tail duped. ; CHECK: yet_more_involved: ; CHECK: jmp .LBB2_1 @@ -78,12 +79,12 @@ exit: ; CHECK-NEXT: callq bar99 ; CHECK-NEXT: callq get ; CHECK-NEXT: cmpl $2999, %eax -; CHECK-NEXT: jg .LBB2_6 -; CHECK-NEXT: callq block_a_true_func -; CHECK-NEXT: jmp .LBB2_7 -; CHECK-NEXT: .LBB2_6: +; CHECK-NEXT: jle .LBB2_5 ; CHECK-NEXT: callq block_a_false_func -; CHECK-NEXT: .LBB2_7: +; CHECK-NEXT: callq block_a_merge_func +; CHECK-NEXT: jmp .LBB2_1 +; CHECK-NEXT: .LBB2_5: +; CHECK-NEXT: callq block_a_true_func ; CHECK-NEXT: callq block_a_merge_func ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: callq body diff --git a/test/Transforms/JumpThreading/and-and-cond.ll b/test/Transforms/JumpThreading/and-and-cond.ll index e6db9ee5a3..765d940cc7 100644 --- a/test/Transforms/JumpThreading/and-and-cond.ll +++ b/test/Transforms/JumpThreading/and-and-cond.ll @@ -1,14 +1,14 @@ -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1} -; There should be no uncond branches left. -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label} +; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s declare i32 @f1() declare i32 @f2() declare void @f3() define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) { +; CHECK: test br i1 %cond, label %T1, label %F1 +; CHECK-NOT: T1: T1: %v1 = call i32 @f1() br label %Merge @@ -18,6 +18,10 @@ F1: br label %Merge Merge: +; CHECK: Merge: +; CHECK: %v1 = call i32 @f1() +; CHECK-NEXT: %D = and i1 %cond2, %cond3 +; CHECK-NEXT: br i1 %D %A = phi i1 [true, %T1], [false, %F1] %B = phi i32 [%v1, %T1], [%v2, %F1] %C = and i1 %A, %cond2 diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll index 58dbec72a7..0159bb3bb7 100644 --- a/test/Transforms/JumpThreading/and-cond.ll +++ b/test/Transforms/JumpThreading/and-cond.ll @@ -1,14 +1,14 @@ -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1} -; There should be no uncond branches left. -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label} +; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s declare i32 @f1() declare i32 @f2() declare void @f3() define i32 @test(i1 %cond, i1 %cond2) { +; CHECK: test br i1 %cond, label %T1, label %F1 +; CHECK-NOT: T1 T1: %v1 = call i32 @f1() br label %Merge @@ -18,6 +18,9 @@ F1: br label %Merge Merge: +; CHECK: Merge: +; CHECK: %v1 = call i32 @f1() +; CHECK-NEXT: br i1 %cond2 %A = phi i1 [true, %T1], [false, %F1] %B = phi i32 [%v1, %T1], [%v2, %F1] %C = and i1 %A, %cond2 diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll index 96ba701046..cce23ea319 100644 --- a/test/Transforms/JumpThreading/thread-loads.ll +++ b/test/Transforms/JumpThreading/thread-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1} +; RUN: opt < %s -jump-threading -S | FileCheck %s ; rdar://6402033 ; Test that we can thread through the block with the partially redundant load (%2). @@ -6,12 +6,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-darwin7" define i32 @foo(i32* %P) nounwind { +; CHECK: foo entry: %0 = tail call i32 (...)* @f1() nounwind ; <i32> [#uses=1] %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] br i1 %1, label %bb1, label %bb bb: ; preds = %entry +; CHECK: bb1.thread: +; CHECK: store +; CHECK: br label %bb3 store i32 42, i32* %P, align 4 br label %bb1 @@ -26,6 +30,9 @@ bb2: ; preds = %bb1 ret i32 %res.0 bb3: ; preds = %bb1 +; CHECK: bb3: +; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ] +; CHECK: ret i32 %res.01 ret i32 %res.0 } diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll index 54e5b14880..93b9a276ea 100644 --- a/test/Transforms/SimplifyCFG/MagicPointer.ll +++ b/test/Transforms/SimplifyCFG/MagicPointer.ll @@ -8,7 +8,6 @@ ; CHECK: i64 2, label ; CHECK: i64 3, label ; CHECK: i64 4, label -; CHECK-NOT: br ; CHECK: } target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll index b485f6ab05..052e10667d 100644 --- a/test/Transforms/SimplifyCFG/basictest.ll +++ b/test/Transforms/SimplifyCFG/basictest.ll @@ -25,16 +25,6 @@ define void @test3(i1 %T) { } -define void @test4() { - br label %return -return: - ret void -; CHECK: @test4 -; CHECK-NEXT: ret void -} -@test4g = global i8* blockaddress(@test4, %return) - - ; PR5795 define void @test5(i32 %A) { switch i32 %A, label %return [ diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll index da7f65a6ca..4e199bc859 100644 --- a/test/Transforms/SimplifyCFG/switch_create.ll +++ b/test/Transforms/SimplifyCFG/switch_create.ll @@ -147,7 +147,7 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 ; CHECK: i32 16, label %UnifiedReturnBlock ; CHECK: i32 17, label %UnifiedReturnBlock ; CHECK: i32 18, label %UnifiedReturnBlock -; CHECK: i32 19, label %switch.edge +; CHECK: i32 19, label %UnifiedReturnBlock ; CHECK: ] } @@ -441,3 +441,29 @@ if.end: ; CHECK-NOT: switch ; CHECK: ret void } + +; PR8675 +; rdar://5134905 +define zeroext i1 @test16(i32 %x) nounwind { +entry: +; CHECK: @test16 +; CHECK: switch i32 %x, label %lor.rhs [ +; CHECK: i32 1, label %lor.end +; CHECK: i32 2, label %lor.end +; CHECK: i32 3, label %lor.end +; CHECK: ] + %cmp.i = icmp eq i32 %x, 1 + br i1 %cmp.i, label %lor.end, label %lor.lhs.false + +lor.lhs.false: + %cmp.i2 = icmp eq i32 %x, 2 + br i1 %cmp.i2, label %lor.end, label %lor.rhs + +lor.rhs: + %cmp.i1 = icmp eq i32 %x, 3 + br label %lor.end + +lor.end: + %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ] + ret i1 %0 +} diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll index f1c820ec43..357ffb60e1 100644 --- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll +++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -simplifycfg -S | not grep br - +; RUN: opt < %s -simplifycfg -S | FileCheck %s %llvm.dbg.anchor.type = type { i32, i32 } %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* } @@ -13,7 +12,16 @@ declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind -define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) { +define i1 @t({ i32, i32 }* %I) { +; CHECK: t +; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [ +; CHECK: i32 14, label %UnifiedReturnBlock +; CHECK: i32 15, label %UnifiedReturnBlock +; CHECK: i32 16, label %UnifiedReturnBlock +; CHECK: i32 17, label %UnifiedReturnBlock +; CHECK: i32 18, label %UnifiedReturnBlock +; CHECK: i32 19, label %UnifiedReturnBlock +; CHECK: ] entry: %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; <i32*> [#uses=1] %tmp.2.i = load i32* %tmp.1.i ; <i32> [#uses=6] |