diff options
author | Chris Lattner <sabre@nondot.org> | 2011-01-02 07:58:36 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-01-02 07:58:36 +0000 |
commit | cf078f2b20899a3a19fb2044cc08dff409f13276 (patch) | |
tree | 10d9344a15babd779c7bf7ceb7385c7f78d6e2e6 | |
parent | c9e152b778715afc5cdfdb98cb0e98757ed827d8 (diff) |
Allow loop-idiom to run on multiple BB loops, but still only scan the loop
header for now for memset/memcpy opportunities. It turns out that loop-rotate
is successfully rotating loops, but *DOESN'T MERGE THE BLOCKS*, turning "for
loops" into 2 basic block loops that loop-idiom was ignoring.
With this fix, we form many *many* more memcpy and memsets than before, including
on the "history" loops in the viterbi benchmark, which look like this:
for (j=0; j<MAX_history; ++j) {
history_new[i][j+1] = history[2*i][j];
}
Transforming these loops into memcpy's speeds up the viterbi benchmark from
11.98s to 3.55s on my machine. Woo.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122685 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/README.txt | 8 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 10 | ||||
-rw-r--r-- | test/Transforms/LoopIdiom/basic.ll | 24 |
3 files changed, 29 insertions, 13 deletions
diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4e374e59d6..f047d087cb 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -414,14 +414,6 @@ this construct. //===---------------------------------------------------------------------===// -[LOOP RECOGNITION] - -viterbi speeds up *significantly* if the various "history" related copy loops -are turned into memcpy calls at the source level. We need a "loops to memcpy" -pass. - -//===---------------------------------------------------------------------===// - [LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e9394cd5c0..84e33f062b 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -126,11 +126,6 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - // We only look at trivial single basic block loops. - // TODO: eventually support more complex loops, scanning the header. - if (L->getBlocks().size() != 1) - return false; - // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) @@ -142,6 +137,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { TD = getAnalysisIfAvailable<TargetData>(); if (TD == 0) return false; + // TODO: We currently only scan the header of the loop, because it is the only + // part that is known to execute and we don't want to make a conditional store + // into an unconditional one in the preheader. However, there can be diamonds + // and other things in the loop that would make other blocks "always executed" + // we should get the full set and scan each block. BasicBlock *BB = L->getHeader(); DEBUG(dbgs() << "loop-idiom Scanning: F[" << BB->getParent()->getName() << "] Loop %" << BB->getName() << "\n"); diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 8929fe4801..589fea4f3a 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -21,6 +21,30 @@ for.end: ; preds = %for.body, %entry ; CHECK-NOT: store } +; This is a loop that was rotated but where the blocks weren't merged. This +; shouldn't perturb us. +define void @test1a(i8* %Base, i64 %Size) nounwind ssp { +bb.nph: ; preds = %entry + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] + %I.0.014 = getelementptr i8* %Base, i64 %indvar + store i8 0, i8* %I.0.014, align 1 + %indvar.next = add i64 %indvar, 1 + br label %for.body.cont +for.body.cont: + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK: @test1a +; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) +; CHECK-NOT: store +} + + define void @test2(i32* %Base, i64 %Size) nounwind ssp { entry: %cmp10 = icmp eq i64 %Size, 0 |