aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2011-11-15 06:26:43 +0000
committerChandler Carruth <chandlerc@gmail.com>2011-11-15 06:26:43 +0000
commit3273c8937b8c3ebdd1cfc0c67054ce5571f0afc9 (patch)
treea07198cd5b11b45fba3bbb0e7e08ccf476b8f25e
parent4c077a1f04c97210793d62debef250b974d168bc (diff)
Rather than trying to use the loop block sequence *or* the function
block sequence when recovering from unanalyzable control flow constructs, *always* use the function sequence. I'm not sure why I ever went down the path of trying to use the loop sequence, it is fundamentally not the correct sequence to use. We're trying to preserve the incoming layout in the cases of unreasonable control flow, and that is only encoded at the function level. We already have a filter to select *exactly* the sub-set of blocks within the function that we're trying to form into a chain. The resulting code layout is also significantly better because of this. In several places we were ending up with completely unreasonable control flow constructs due to the ordering chosen by the loop structure for its internal storage. This change removes a completely wasteful vector of basic blocks, saving memory allocation in the common case even though it costs us CPU in the fairly rare case of unnatural loops. Finally, it fixes the latest crasher reduced out of GCC's single source. Thanks again to Benjamin Kramer for the reduction, my bugpoint skills failed at it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144627 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp51
-rw-r--r--test/CodeGen/X86/block-placement.ll73
2 files changed, 96 insertions, 28 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 304f16717b..2fef9c45ca 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -214,11 +214,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
MachineBasicBlock *selectBestCandidateBlock(
BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
const BlockFilterSet *BlockFilter);
- MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain,
- ArrayRef<MachineBasicBlock *> Blocks,
- unsigned &PrevUnplacedBlockIdx);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- ArrayRef<MachineBasicBlock *> Blocks,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = 0);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
@@ -444,18 +445,20 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
///
/// This routine is called when we are unable to use the CFG to walk through
/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
-/// We walk through the sequence of blocks, starting from the
-/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire
-/// sequence on repeated calls to this routine.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
- const BlockChain &PlacedChain,
- ArrayRef<MachineBasicBlock *> Blocks,
- unsigned &PrevUnplacedBlockIdx) {
- for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *BB = Blocks[i];
- if (BlockToChain[BB] != &PlacedChain) {
- PrevUnplacedBlockIdx = i;
- return BB;
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ return I;
}
}
return 0;
@@ -464,14 +467,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB,
BlockChain &Chain,
- ArrayRef<MachineBasicBlock *> Blocks,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
assert(BlockToChain[BB] == &Chain);
assert(*Chain.begin() == BB);
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
- unsigned PrevUnplacedBlockIdx = 0;
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
MachineBasicBlock *LoopHeaderBB = BB;
markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
@@ -510,7 +513,8 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx);
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
if (!BestSucc)
break;
@@ -579,8 +583,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
BlockWorkList.push_back(*BI);
}
- buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList,
- &LoopBlockSet);
+ buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -630,17 +633,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
++LI)
buildLoopChains(F, **LI);
- // We need a vector of blocks so that buildChain can handle unnatural CFG
- // constructs by searching for unplaced blocks and just concatenating them.
- SmallVector<MachineBasicBlock *, 16> Blocks;
- Blocks.reserve(F.size());
-
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = &*FI;
- Blocks.push_back(BB);
BlockChain &Chain = *BlockToChain[BB];
if (!UpdatedPreds.insert(&Chain))
continue;
@@ -663,7 +660,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
BlockChain &FunctionChain = *BlockToChain[&F.front()];
- buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList);
+ buildChain(&F.front(), FunctionChain, BlockWorkList);
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
DEBUG({
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index e41d52c418..6220ae3286 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -241,8 +241,8 @@ define void @unnatural_cfg1() {
; CHECK: unnatural_cfg1
; CHECK: %entry
; CHECK: %loop.body1
-; CHECK: %loop.body3
; CHECK: %loop.body2
+; CHECK: %loop.body3
entry:
br label %loop.header
@@ -272,6 +272,77 @@ loop.body5:
br label %loop.body3
}
+define void @unnatural_cfg2() {
+; Test that we can handle a loop with a nested natural loop *and* an unnatural
+; loop. This was reduced from a crash on block placement when run over
+; single-source GCC.
+; CHECK: unnatural_cfg2
+; CHECK: %entry
+; CHECK: %loop.header
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
+; The end block is folded with %loop.body3...
+; CHECK-NOT: %loop.inner1.end
+; CHECK: %loop.body4
+; CHECK: %loop.inner2.begin
+; The loop.inner2.end block is folded
+; CHECK: %bail
+
+entry:
+ br label %loop.header
+
+loop.header:
+ %comp0 = icmp eq i32* undef, null
+ br i1 %comp0, label %bail, label %loop.body1
+
+loop.body1:
+ %val0 = load i32** undef, align 4
+ br i1 undef, label %loop.body2, label %loop.inner1.begin
+
+loop.body2:
+ br i1 undef, label %loop.body4, label %loop.body3
+
+loop.body3:
+ %ptr1 = getelementptr inbounds i32* %val0, i32 0
+ %castptr1 = bitcast i32* %ptr1 to i32**
+ %val1 = load i32** %castptr1, align 4
+ br label %loop.inner1.begin
+
+loop.inner1.begin:
+ %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+ %castval = bitcast i32* %valphi to i32*
+ %comp1 = icmp eq i32 undef, 48
+ br i1 %comp1, label %loop.inner1.end, label %loop.body4
+
+loop.inner1.end:
+ %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+ %castptr2 = bitcast i32* %ptr2 to i32**
+ %val2 = load i32** %castptr2, align 4
+ br label %loop.inner1.begin
+
+loop.body4.dead:
+ br label %loop.body4
+
+loop.body4:
+ %comp2 = icmp ult i32 undef, 3
+ br i1 %comp2, label %loop.inner2.begin, label %loop.end
+
+loop.inner2.begin:
+ br i1 false, label %loop.end, label %loop.inner2.end
+
+loop.inner2.end:
+ %comp3 = icmp eq i32 undef, 1769472
+ br i1 %comp3, label %loop.end, label %loop.inner2.begin
+
+loop.end:
+ br label %loop.header
+
+bail:
+ unreachable
+}
+
define i32 @problematic_switch() {
; This function's CFG caused overlow in the machine branch probability
; calculation, triggering asserts. Make sure we don't crash on it.