aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-12-26 23:42:51 +0000
committerChris Lattner <sabre@nondot.org>2010-12-26 23:42:51 +0000
commita92ff91a967c63a2395a34c9e8331a7d50d6ab10 (patch)
tree742fb55b9aed958aafb122dfe55b0b138c815f31
parent61db1f56d0b717d67557bbb2a9d83af1449458cb (diff)
implement enough of the memset inference algorithm to recognize and insert
memsets. This is still missing one important validity check, but this is enough to compile stuff like this: void test0(std::vector<char> &X) { for (std::vector<char>::iterator I = X.begin(), E = X.end(); I != E; ++I) *I = 0; } void test1(std::vector<int> &X) { for (long i = 0, e = X.size(); i != e; ++i) X[i] = 0x01010101; } With: $ clang t.cpp -S -o - -O2 -emit-llvm | opt -loop-idiom | opt -O3 | llc to: __Z5test0RSt6vectorIcSaIcEE: ## @_Z5test0RSt6vectorIcSaIcEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rsi cmpq %rsi, %rax je LBB0_2 ## BB#1: ## %bb.nph subq %rax, %rsi movq %rax, %rdi callq ___bzero LBB0_2: ## %for.end addq $8, %rsp ret ... __Z5test1RSt6vectorIiSaIiEE: ## @_Z5test1RSt6vectorIiSaIiEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rdx subq %rax, %rdx cmpq $4, %rdx jb LBB1_2 ## BB#1: ## %for.body.preheader andq $-4, %rdx movl $1, %esi movq %rax, %rdi callq _memset LBB1_2: ## %for.end addq $8, %rsp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122573 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h3
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp89
-rw-r--r--test/Transforms/LoopIdiom/basic.ll44
-rw-r--r--test/Transforms/LoopIdiom/dg.exp3
4 files changed, 126 insertions, 13 deletions
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 38c50d1fbe..d910406771 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -539,8 +539,7 @@ namespace llvm {
/// getMinusSCEV - Return LHS-RHS.
///
- const SCEV *getMinusSCEV(const SCEV *LHS,
- const SCEV *RHS);
+ const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS);
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
/// of the input value to the specified type. If the type must be
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index eb7d4930ed..a5e2b2dba1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -17,9 +17,11 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -41,6 +43,11 @@ namespace {
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
+ Value *SplatValue,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
@@ -96,8 +103,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
bool MadeChange = false;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
// Look for store instructions, which may be memsets.
- if (StoreInst *SI = dyn_cast<StoreInst>(I++))
- MadeChange |= processLoopStore(SI, BECount);
+ StoreInst *SI = dyn_cast<StoreInst>(I++);
+ if (SI == 0 || SI->isVolatile()) continue;
+
+
+ MadeChange |= processLoopStore(SI, BECount);
}
return MadeChange;
@@ -106,6 +116,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
/// scanBlock - Look over a block to see if we can promote anything out of it.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
// Check to see if the store updates all bits in memory. We don't want to
// process things like a store of i3. We also require that the store be a
@@ -118,8 +129,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
- const SCEVAddRecExpr *Ev =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(SI->getPointerOperand()));
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
@@ -130,18 +140,75 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
return false;
- errs() << "Found strided store: " << *Ev << "\n";
-
- // Check for memcpy here.
-
-
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consequtive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset.
- Value *SplatValue = isBytewiseValue(StoredVal);
- if (SplatValue == 0) return false;
+ if (Value *SplatValue = isBytewiseValue(StoredVal))
+ return processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, Ev, BECount);
+
+ // Handle the memcpy case here.
+ errs() << "Found strided store: " << *Ev << "\n";
return false;
}
+/// processLoopStoreOfSplatValue - We see a strided store of a memsetable value.
+/// If we can transform this into a memset in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
+ Value *SplatValue,
+ const SCEVAddRecExpr *Ev, const SCEV *BECount) {
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for an alias.
+
+ // FIXME: TODO safety check.
+
+ // Okay, everything looks good, insert the memset.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ unsigned AddrSpace = SI->getPointerAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ unsigned BESize = SE->getTypeSizeInBits(BECount->getType());
+ if (BESize < TD->getPointerSizeInBits())
+ BECount = SE->getZeroExtendExpr(BECount, IntPtr);
+ else if (BESize > TD->getPointerSizeInBits())
+ BECount = SE->getTruncateExpr(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true, true /*nooverflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true, true /*nooverflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall =
+ Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, SI->getAlignment());
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *SI << "\n");
+
+ // Okay, the memset has been formed. Zap the original store.
+ // FIXME: We want to recursively delete dead instructions, but we have to
+ // update SCEV.
+ SI->eraseFromParent();
+ return true;
+}
+
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
new file mode 100644
index 0000000000..f694474a6c
--- /dev/null
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @test1(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8* %Base, i64 %indvar
+ store i8 0, i8* %I.0.014, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK: @test1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+define void @test2(i32* %Base, i64 %Size) nounwind ssp {
+entry:
+ %cmp10 = icmp eq i64 %Size, 0
+ br i1 %cmp10, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %add.ptr.i = getelementptr i32* %Base, i64 %i.011
+ store i32 16843009, i32* %add.ptr.i, align 4
+ %inc = add nsw i64 %i.011, 1
+ %exitcond = icmp eq i64 %inc, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK: @test2
+; CHECK: br i1 %cmp10,
+; CHECK: %tmp = mul i64 %Size, 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
+; CHECK-NOT: store
+}
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
new file mode 100644
index 0000000000..f2005891a5
--- /dev/null
+++ b/test/Transforms/LoopIdiom/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]