aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-08-21 17:32:13 -0700
committerDerek Schuff <dschuff@chromium.org>2012-08-21 17:32:13 -0700
commit66f271497ed92ebb05c66f54616e512606a2e314 (patch)
tree96d54cd64804ab7c9f2f52f680c3301aa789ce1d /lib/Transforms
parentb62e9abf7dd9e39c95327914ce9dfe216386824a (diff)
parentbc363931085587bac42a40653962a3e5acd1ffce (diff)
Merge up to r162331, git commit bc363931085587bac42a40653962a3e5acd1ffce
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp5
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp18
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp188
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp2
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp41
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp231
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp109
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp421
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp (renamed from lib/Transforms/Scalar/BoundsChecking.cpp)19
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp5
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h53
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp2
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp5
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp16
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp128
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp41
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp80
-rw-r--r--lib/Transforms/Scalar/GVN.cpp369
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp6
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp74
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp8
-rw-r--r--lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp50
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp85
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp4
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp164
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp9
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp7
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp27
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp321
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp55
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp130
-rw-r--r--lib/Transforms/Scalar/Sink.cpp66
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp10
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp24
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp126
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp8
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp7
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp6
51 files changed, 1866 insertions, 1176 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 95a3b3d7c8..b94dd69deb 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -245,10 +245,7 @@ static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
const ArgPromotion::IndicesVector &Longer) {
if (Prefix.size() > Longer.size())
return false;
- for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
- if (Prefix[i] != Longer[i])
- return false;
- return true;
+ return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 51d0a002e1..b2748f2e6c 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -53,11 +53,11 @@ namespace {
I != E; ++I) {
if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
I->setInitializer(0);
- } else {
- if (I->hasAvailableExternallyLinkage())
- continue;
- if (I->getName() == "llvm.global_ctors")
- continue;
+ } else {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ if (I->getName() == "llvm.global_ctors")
+ continue;
// @LOCALMOD-BEGIN - this is likely upstreamable
// Note: there will likely be more cases once this
// is exercises more thorougly.
@@ -67,7 +67,7 @@ namespace {
if (I->hasExternalWeakLinkage())
continue;
// @LOCALMOD-END
- }
+ }
if (I->hasLocalLinkage())
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -78,9 +78,9 @@ namespace {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
I->deleteBody();
- } else {
- if (I->hasAvailableExternallyLinkage())
- continue;
+ } else {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
// @LOCALMOD-BEGIN - this is likely upstreamable
// Note: there will likely be more cases once this
// is exercises more thorougly.
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4e1c23c198..6d950d2024 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -296,6 +296,168 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
return false;
}
+/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
+/// as a root? If so, we might not really want to eliminate the stores to it.
+static bool isLeakCheckerRoot(GlobalVariable *GV) {
+ // A global variable is a root if it is a pointer, or could plausibly contain
+ // a pointer. There are two challenges; one is that we could have a struct
+ // the has an inner member which is a pointer. We recurse through the type to
+ // detect these (up to a point). The other is that we may actually be a union
+ // of a pointer and another type, and so our LLVM type is an integer which
+ // gets converted into a pointer, or our type is an [i8 x #] with a pointer
+ // potentially contained here.
+
+ if (GV->hasPrivateLinkage())
+ return false;
+
+ SmallVector<Type *, 4> Types;
+ Types.push_back(cast<PointerType>(GV->getType())->getElementType());
+
+ unsigned Limit = 20;
+ do {
+ Type *Ty = Types.pop_back_val();
+ switch (Ty->getTypeID()) {
+ default: break;
+ case Type::PointerTyID: return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID: {
+ SequentialType *STy = cast<SequentialType>(Ty);
+ Types.push_back(STy->getElementType());
+ break;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+ if (STy->isOpaque()) return true;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Type *InnerTy = *I;
+ if (isa<PointerType>(InnerTy)) return true;
+ if (isa<CompositeType>(InnerTy))
+ Types.push_back(InnerTy);
+ }
+ break;
+ }
+ }
+ if (--Limit == 0) return true;
+ } while (!Types.empty());
+ return false;
+}
+
+/// Given a value that is stored to a global but never read, determine whether
+/// it's safe to remove the store and the chain of computation that feeds the
+/// store.
+static bool IsSafeComputationToRemove(Value *V) {
+ do {
+ if (isa<Constant>(V))
+ return true;
+ if (!V->hasOneUse())
+ return false;
+ if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+ isa<GlobalValue>(V))
+ return false;
+ if (isAllocationFn(V))
+ return true;
+
+ Instruction *I = cast<Instruction>(V);
+ if (I->mayHaveSideEffects())
+ return false;
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllConstantIndices())
+ return false;
+ } else if (I->getNumOperands() != 1) {
+ return false;
+ }
+
+ V = I->getOperand(0);
+ } while (1);
+}
+
+/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users
+/// of the global and clean up any that obviously don't assign the global a
+/// value that isn't dynamically allocated.
+///
+static bool CleanupPointerRootUsers(GlobalVariable *GV) {
+ // A brief explanation of leak checkers. The goal is to find bugs where
+ // pointers are forgotten, causing an accumulating growth in memory
+ // usage over time. The common strategy for leak checkers is to whitelist the
+ // memory pointed to by globals at exit. This is popular because it also
+ // solves another problem where the main thread of a C++ program may shut down
+ // before other threads that are still expecting to use those globals. To
+ // handle that case, we expect the program may create a singleton and never
+ // destroy it.
+
+ bool Changed = false;
+
+ // If Dead[n].first is the only use of a malloc result, we can delete its
+ // chain of computation and the store to the global in Dead[n].second.
+ SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
+
+ // Constants can't be pointers to dynamically allocated memory.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E;) {
+ User *U = *UI++;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ Value *V = SI->getValueOperand();
+ if (isa<Constant>(V)) {
+ Changed = true;
+ SI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, SI));
+ }
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
+ if (isa<Constant>(MSI->getValue())) {
+ Changed = true;
+ MSI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MSI));
+ }
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
+ GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
+ if (MemSrc && MemSrc->isConstant()) {
+ Changed = true;
+ MTI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MTI));
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ if (SafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ Dead.clear();
+ CleanupPointerRootUsers(GV);
+ return true;
+ }
+ }
+ }
+
+ for (int i = 0, e = Dead.size(); i != e; ++i) {
+ if (IsSafeComputationToRemove(Dead[i].first)) {
+ Dead[i].second->eraseFromParent();
+ Instruction *I = Dead[i].first;
+ do {
+ if (isAllocationFn(I))
+ break;
+ Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
+ if (!J)
+ break;
+ I->eraseFromParent();
+ I = J;
+ } while (1);
+ I->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
/// users of the global, cleaning up the obvious ones. This is largely just a
/// quick scan over the use list to clean up the easy and obvious cruft. This
@@ -812,13 +974,18 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// If we nuked all of the loads, then none of the stores are needed either,
// nor is the global.
if (AllNonStoreUsesGone) {
- DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
- CleanupConstantGlobalUsers(GV, 0, TD, TLI);
+ if (isLeakCheckerRoot(GV)) {
+ Changed |= CleanupPointerRootUsers(GV);
+ } else {
+ Changed = true;
+ CleanupConstantGlobalUsers(GV, 0, TD, TLI);
+ }
if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ Changed = true;
GV->eraseFromParent();
++NumDeleted;
}
- Changed = true;
}
return Changed;
}
@@ -1794,10 +1961,15 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!GS.isLoaded) {
DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
- // Delete any stores we can find to the global. We may not be able to
- // make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
- TD, TLI);
+ bool Changed;
+ if (isLeakCheckerRoot(GV)) {
+ // Delete any constant stores to the global.
+ Changed = CleanupPointerRootUsers(GV);
+ } else {
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+ }
// If the global is dead now, delete it.
if (GV->use_empty()) {
@@ -1845,7 +2017,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (GV->use_empty()) {
DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
+ << "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
} else {
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 715a384adc..9f70f668a8 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -389,7 +389,7 @@ bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
if (!C2) return false;
// TODO: constant expressions with GEP or references to F1 or F2.
if (C1->isNullValue() && C2->isNullValue() &&
- isEquivalentType(C1->getType(), C2->getType()))
+ isEquivalentType(C1->getType(), C2->getType()))
return true;
// Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
// then they must have equal bit patterns.
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index d8e8cf77dd..80bfc1cdb2 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -27,6 +27,7 @@
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/TypeFinder.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
@@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
// Strip any named types of their names.
static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
- std::vector<StructType*> StructTypes;
- M.findUsedStructTypes(StructTypes);
+ TypeFinder StructTypes;
+ StructTypes.run(M, false);
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index c2b0e03b40..0d5ef904ee 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -187,7 +187,7 @@ public:
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
- Instruction *visitMalloc(Instruction &FI);
+ Instruction *visitAllocSite(Instruction &FI);
Instruction *visitFree(CallInst &FI);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f74cff85c6..cbe1ca4ddc 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
- unsigned Size = MemOpLength->getZExtValue();
- if (Size == 0) return MI; // Delete this mem transfer.
+ uint64_t Size = MemOpLength->getLimitedValue();
+ assert(Size && "0-sized memory transfering should be removed already.");
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
@@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return 0;
- uint64_t Len = LenC->getZExtValue();
+ uint64_t Len = LenC->getLimitedValue();
Alignment = MI->getAlignment();
-
- // If the length is zero, this is a no-op
- if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+ assert(Len && "0-sized memory setting should be removed already.");
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
@@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
if (CI->getCalledFunction() == 0) return 0;
InstCombineFortifiedLibCalls Simplifier(this);
- Simplifier.fold(CI, TD);
+ Simplifier.fold(CI, TD, TLI);
return Simplifier.NewInstruction;
}
@@ -880,7 +878,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (isAllocLikeFn(CS.getInstruction()))
- return visitMalloc(*CS.getInstruction());
+ return visitAllocSite(*CS.getInstruction());
bool Changed = false;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7076d88554..c3fc18c300 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
case ICmpInst::ICMP_UGE:
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
- if (!RHS.isNegative())
+ if (RHS.isNegative())
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
Pred = ICmpInst::ICMP_UGT;
break;
@@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return Res;
}
break;
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(LHSI);
+ LibFunc::Func Func;
+ // Various optimization for fabs compared with zero.
+ if (RHSC->isNullValue() && CI->getCalledFunction() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->has(Func)) {
+ if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+ Func == LibFunc::fabsl) {
+ switch (I.getPredicate()) {
+ default: break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ return ReplaceInstUsesWith(I, Builder->getFalse());
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+ RHSC);
+ }
+ }
+ }
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b9df5eb81e..6ecb4c52c4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,72 +20,153 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
-
-// Try to kill dead allocas by walking through its uses until we see some use
-// that could escape. This is a conservative analysis which tries to handle
-// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things
-// left after inlining and SROA finish chewing on an alloca.
-static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) {
- SmallVector<Instruction *, 4> Worklist, DeadStores;
- Worklist.push_back(&AI);
- do {
- Instruction *PI = Worklist.pop_back_val();
- for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end();
- UI != UE; ++UI) {
- Instruction *I = cast<Instruction>(*UI);
- switch (I->getOpcode()) {
- default:
- // Give up the moment we see something we can't handle.
- return 0;
-
- case Instruction::GetElementPtr:
- case Instruction::BitCast:
- Worklist.push_back(I);
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete,
+ bool IsOffset = false) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+ IsOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
continue;
- case Instruction::Call:
- // We can handle a limited subset of calls to no-op intrinsics.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- continue;
- default:
- return 0;
- }
- }
- // Reject everything else.
- return 0;
-
- case Instruction::Store: {
- // Stores into the alloca are only live if the alloca is live.
- StoreInst *SI = cast<StoreInst>(I);
- // We can eliminate atomic stores, but not volatile.
- if (SI->isVolatile())
- return 0;
- // The store is only trivially safe if the poniter is the destination
- // as opposed to the value. We're conservative here and don't check for
- // the case where we store the address of a dead alloca into a dead
- // alloca.
- if (SI->getPointerOperand() != PI)
- return 0;
- DeadStores.push_back(I);
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
continue;
}
- }
}
- } while (!Worklist.empty());
- // The alloca is dead. Kill off all the stores to it, and then replace it
- // with undef.
- while (!DeadStores.empty())
- IC.EraseInstFromFunction(*DeadStores.pop_back_val());
- return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType()));
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return 0;
+}
+
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
@@ -179,10 +260,32 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
- // Try to aggressively remove allocas which are only used for GEPs, lifetime
- // markers, and stores. This happens when SROA iteratively promotes stores
- // out of the alloca, and we need to cleanup after it.
- return removeDeadAlloca(*this, AI);
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+
+ // At last, use the generic allocation site handler to aggressively remove
+ // unused allocas.
+ return visitAllocSite(AI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb9945b681..291e80019e 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return 0;
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return 0;
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
}
@@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ unsigned VWidth = VecTy->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+ if (V != &SI)
+ return ReplaceInstUsesWith(SI, V);
+ return &SI;
+ }
+ }
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 125c74a89a..54be8ed3fa 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
break;
}
+ case Instruction::Select: {
+ APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+ if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (CV->getAggregateElement(i)->isNullValue())
+ LeftDemanded.clearBit(i);
+ else
+ RightDemanded.clearBit(i);
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined.
+ UndefElts &= UndefElts2;
+ break;
+ }
case Instruction::BitCast: {
// Vector->vector casts only.
VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
@@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
case Instruction::Call: {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c5124bf7b2..68ecd51604 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -207,7 +207,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
if (MaintainNoSignedWrap(I, B, C) &&
- (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
+ (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
// Note: this is only valid because SimplifyBinOp doesn't look at
// the operands to Op0.
I.clearSubclassOptionalData();
@@ -1106,54 +1106,89 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users,
- int Depth = 0) {
- if (Depth == 8)
- return false;
+static bool
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
+ SmallVector<Instruction*, 4> Worklist;
+ Worklist.push_back(AI);
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
- if (isFreeCall(U)) {
- Users.push_back(U);
- continue;
- }
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
- if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) {
- Users.push_back(ICI);
+ do {
+ Instruction *PI = Worklist.pop_back_val();
+ for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE;
+ ++UI) {
+ Instruction *I = cast<Instruction>(*UI);
+ switch (I->getOpcode()) {
+ default:
+ // Give up the moment we see something we can't handle.
+ return false;
+
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ Users.push_back(I);
+ Worklist.push_back(I);
continue;
- }
- }
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) {
- Users.push_back(BCI);
+
+ case Instruction::ICmp: {
+ ICmpInst *ICI = cast<ICmpInst>(I);
+ // We can fold eq/ne comparisons with null to false/true, respectively.
+ if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ Users.push_back(I);
continue;
}
- }
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) {
- Users.push_back(GEPI);
+
+ case Instruction::Call:
+ // Ignore no-op and store intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset: {
+ MemIntrinsic *MI = cast<MemIntrinsic>(II);
+ if (MI->isVolatile() || MI->getRawDest() != PI)
+ return false;
+ }
+ // fall through
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ Users.push_back(I);
+ continue;
+ }
+ }
+
+ if (isFreeCall(I)) {
+ Users.push_back(I);
+ continue;
+ }
+ return false;
+
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ if (SI->isVolatile() || SI->getPointerOperand() != PI)
+ return false;
+ Users.push_back(I);
continue;
}
- }
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- Users.push_back(II);
- continue;
}
+ llvm_unreachable("missing a return?");
}
- return false;
- }
+ } while (!Worklist.empty());
return true;
}
-Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
// If we have a malloc call which is only used in any amount of comparisons
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
SmallVector<WeakVH, 64> Users;
- if (IsOnlyNullComparedAndFreed(&MI, Users)) {
+ if (isAllocSiteRemovable(&MI, Users)) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
Instruction *I = cast_or_null<Instruction>(&*Users[i]);
if (!I) continue;
@@ -1164,6 +1199,12 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) {
C->isFalseWhenEqual()));
} else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
ReplaceInstUsesWith(*I, UndefValue::get(I->getType()));
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::objectsize) {
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
+ uint64_t DontKnow = CI->isZero() ? -1ULL : 0;
+ ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow));
+ }
}
EraseInstFromFunction(*I);
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 482ebef2a2..06f4d2fedd 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -18,6 +18,7 @@
#include "FunctionBlackList.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
+#include "llvm/InlineAsm.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
@@ -60,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -72,6 +75,9 @@ static const int kAsanStackMidRedzoneMagic = 0xf2;
static const int kAsanStackRightRedzoneMagic = 0xf3;
static const int kAsanStackPartialRedzoneMagic = 0xf4;
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
// Command-line flags.
// This flag may need to be replaced with -f[no-]asan-reads.
@@ -82,7 +88,10 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
cl::Hidden, cl::init(true));
-// This flags limits the number of instructions to be instrumented
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"),
+ cl::Hidden, cl::init(false));
+// This flag limits the number of instructions to be instrumented
// in any given BB. Normally, this should be set to unlimited (INT_MAX),
// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
// set it to 10000.
@@ -99,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -fasan-blacklist.
@@ -138,21 +149,34 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
namespace {
+/// An object of this type is created while instrumenting every function.
+struct AsanFunctionContext {
+ AsanFunctionContext(Function &Function) : F(Function) { }
+
+ Function &F;
+};
+
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public ModulePass {
AddressSanitizer();
virtual const char *getPassName() const;
- void instrumentMop(Instruction *I);
- void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
+ void instrumentMop(AsanFunctionContext &AFC, Instruction *I);
+ void instrumentAddress(AsanFunctionContext &AFC,
+ Instruction *OrigIns, IRBuilder<> &IRB,
Value *Addr, uint32_t TypeSize, bool IsWrite);
- Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr,
- bool IsWrite, uint32_t TypeSize);
- bool instrumentMemIntrinsic(MemIntrinsic *MI);
- void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
- Value *Size,
+ Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue, uint32_t TypeSize);
+ Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
+ bool IsWrite, size_t AccessSizeIndex);
+ bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
+ void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
+ Instruction *OrigIns, Value *Addr,
+ Value *Size,
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool handleFunction(Module &M, Function &F);
+ void createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool poisonStackInFunction(Module &M, Function &F);
virtual bool runOnModule(Module &M);
@@ -160,7 +184,6 @@ struct AddressSanitizer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
private:
-
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -176,11 +199,13 @@ struct AddressSanitizer : public ModulePass {
}
Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
+ void FindDynamicInitializers(Module &M);
+ bool HasDynamicInitializer(GlobalVariable *G);
- Module *CurrentModule;
LLVMContext *C;
TargetData *TD;
uint64_t MappingOffset;
@@ -193,7 +218,12 @@ struct AddressSanitizer : public ModulePass {
Function *AsanInitFunction;
Instruction *CtorInsertBefore;
OwningPtr<FunctionBlackList> BL;
+ // This array is indexed by AccessIsWrite and log2(AccessSize).
+ Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+ InlineAsm *EmptyAsm;
+ SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
};
+
} // namespace
char AddressSanitizer::ID = 0;
@@ -209,6 +239,12 @@ const char *AddressSanitizer::getPassName() const {
return "AddressSanitizer";
}
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+ size_t Res = CountTrailingZeros_32(TypeSize / 8);
+ assert(Res < kNumberOfAccessSizes);
+ return Res;
+}
+
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
@@ -227,19 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
// ThenBlock
// Tail
//
-// Returns the ThenBlock's terminator.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getParent()->getParent()->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent());
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
BranchInst *HeadNewTerm =
BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
- BranchInst *CheckTerm = BranchInst::Create(Tail, ThenBlock);
return CheckTerm;
}
@@ -253,12 +294,13 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
MappingOffset));
}
-void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,
+void AddressSanitizer::instrumentMemIntrinsicParam(
+ AsanFunctionContext &AFC, Instruction *OrigIns,
Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
// Check the first byte.
{
IRBuilder<> IRB(InsertBefore);
- instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
+ instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);
}
// Check the last byte.
{
@@ -268,15 +310,16 @@ void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,
SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+ instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
}
}
// Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
+ MemIntrinsic *MI) {
Value *Dst = MI->getDest();
MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
- Value *Src = MemTran ? MemTran->getSource() : NULL;
+ Value *Src = MemTran ? MemTran->getSource() : 0;
Value *Length = MI->getLength();
Constant *ConstLength = dyn_cast<Constant>(Length);
@@ -289,12 +332,12 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(Cmp);
+ InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
}
- instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
+ instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
if (Src)
- instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
+ instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
return true;
}
@@ -324,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
-void AddressSanitizer::instrumentMop(Instruction *I) {
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+
+ GlobalVariable *G = cast<GlobalVariable>(VG);
+ DynamicallyInitializedGlobals.insert(G);
+ }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+ return DynamicallyInitializedGlobals.count(G);
+}
+
+void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
bool IsWrite;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
- if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
- // We are accessing a global scalar variable. Nothing to catch here.
- return;
+ if (ClOpt && ClOptGlobals) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ if (!ClInitializers)
+ return;
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global has external linkage, we
+ // assume it has dynamic initialization, as it may have an initializer
+ // in a different TU.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ !HasDynamicInitializer(G))
+ return;
+ }
}
+
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
@@ -345,7 +424,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
}
IRBuilder<> IRB(I);
- instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
+ instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -360,18 +439,38 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
}
Instruction *AddressSanitizer::generateCrashCode(
- IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) {
- // IsWrite and TypeSize are encoded in the function name.
- std::string FunctionName = std::string(kAsanReportErrorTemplate) +
- (IsWrite ? "store" : "load") + itostr(TypeSize / 8);
- Value *ReportWarningFunc = CurrentModule->getOrInsertFunction(
- FunctionName, IRB.getVoidTy(), IntptrTy, NULL);
- CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr);
- Call->setDoesNotReturn();
+ Instruction *InsertBefore, Value *Addr,
+ bool IsWrite, size_t AccessSizeIndex) {
+ IRBuilder<> IRB(InsertBefore);
+ CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+ Addr);
+ // We don't do Call->setDoesNotReturn() because the BB already has
+ // UnreachableInst at the end.
+ // This EmptyAsm is required to avoid callback merge.
+ IRB.CreateCall(EmptyAsm);
return Call;
}
-void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue,
+ uint32_t TypeSize) {
+ size_t Granularity = 1 << MappingScale;
+ // Addr & (Granularity - 1)
+ Value *LastAccessedByte = IRB.CreateAnd(
+ AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ // (Addr & (Granularity - 1)) + size - 1
+ if (TypeSize / 8 > 1)
+ LastAccessedByte = IRB.CreateAdd(
+ LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+ // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+ LastAccessedByte = IRB.CreateIntCast(
+ LastAccessedByte, ShadowValue->getType(), false);
+ // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+ return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
+ Instruction *OrigIns,
IRBuilder<> &IRB, Value *Addr,
uint32_t TypeSize, bool IsWrite) {
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
@@ -385,32 +484,117 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
- Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp);
- IRBuilder<> IRB2(CheckTerm);
-
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
size_t Granularity = 1 << MappingScale;
- if (TypeSize < 8 * Granularity) {
- // Addr & (Granularity - 1)
- Value *LastAccessedByte = IRB2.CreateAnd(
- AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
- // (Addr & (Granularity - 1)) + size - 1
- if (TypeSize / 8 > 1)
- LastAccessedByte = IRB2.CreateAdd(
- LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
- // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
- LastAccessedByte = IRB2.CreateIntCast(
- LastAccessedByte, IRB.getInt8Ty(), false);
- // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
- Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);
-
- CheckTerm = splitBlockAndInsertIfThen(Cmp2);
- }
-
- IRBuilder<> IRB1(CheckTerm);
- Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
+ TerminatorInst *CrashTerm = 0;
+
+ if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+ assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+ BasicBlock *NextBB = CheckTerm->getSuccessor(0);
+ IRB.SetInsertPoint(CheckTerm);
+ Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
+ BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+ ReplaceInstWithInst(CheckTerm, NewTerm);
+ } else {
+ CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+ }
+
+ Instruction *Crash =
+ generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
Crash->setDebugLoc(OrigIns->getDebugLoc());
- ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
+}
+
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr,
+ Value *LastAddr) {
+ // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+ Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // If that function is not present, this TU contains no globals, or they have
+ // all been optimized away
+ if (!GlobalInit)
+ return;
+
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+ // Declare our poisoning and unpoisoning functions.
+ Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+ Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+ // Add a call to poison all external globals before the given function starts.
+ IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+ I != E; ++I) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+ }
+ }
+}
+
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G);
+
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ return false;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ return false;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize) return false;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ return false;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ return false;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ return false;
+ }
+ }
+
+ return true;
}
// This function replaces all global variables with new variables that have
@@ -419,62 +603,10 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
bool AddressSanitizer::insertGlobalRedzones(Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
- for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
- E = M.getGlobalList().end(); G != E; ++G) {
- Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
-
- if (!Ty->isSized()) continue;
- if (!G->hasInitializer()) continue;
- // Touch only those globals that will not be defined in other modules.
- // Don't handle ODR type linkages since other modules may be built w/o asan.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- G->getLinkage() != GlobalVariable::PrivateLinkage &&
- G->getLinkage() != GlobalVariable::InternalLinkage)
- continue;
- // Two problems with thread-locals:
- // - The address of the main thread's copy can't be computed at link-time.
- // - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- continue;
- // For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize) continue;
-
- // Ignore all the globals with the names starting with "\01L_OBJC_".
- // Many of those are put into the .cstring section. The linker compresses
- // that section by removing the spare \0s after the string terminator, so
- // our redzones get broken.
- if ((G->getName().find("\01L_OBJC_") == 0) ||
- (G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
- continue;
- }
-
- if (G->hasSection()) {
- StringRef Section(G->getSection());
- // Ignore the globals from the __OBJC section. The ObjC runtime assumes
- // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
- // them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
- DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
- continue;
- }
- // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
- // Constant CFString instances are compiled in the following way:
- // -- the string buffer is emitted into
- // __TEXT,__cstring,cstring_literals
- // -- the constant NSConstantString structure referencing that buffer
- // is placed into __DATA,__cfstring
- // Therefore there's no point in placing redzones into __DATA,__cfstring.
- // Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
- DEBUG(dbgs() << "Ignoring CFString: " << *G);
- continue;
- }
- }
-
- GlobalsToChange.push_back(G);
+ for (Module::GlobalListType::iterator G = M.global_begin(),
+ E = M.global_end(); G != E; ++G) {
+ if (ShouldInstrumentGlobal(G))
+ GlobalsToChange.push_back(G);
}
size_t n = GlobalsToChange.size();
@@ -485,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n);
+ IntptrTy, IntptrTy,
+ IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n), DynamicInit;
IRBuilder<> IRB(CtorInsertBefore);
+ if (ClInitializers)
+ FindDynamicInitializers(M);
+
+ // The addresses of the first and last dynamically initialized globals in
+ // this TU. Used in initialization order checking.
+ Value *FirstDynamic = 0, *LastDynamic = 0;
+
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -500,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+ // Determine whether this global should be poisoned in initialization.
+ bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -534,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
+
+ // Populate the first and last globals declared in this TU.
+ if (ClInitializers && GlobalHasDynamicInitializer) {
+ LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+ if (FirstDynamic == 0)
+ FirstDynamic = LastDynamic;
+ }
+
DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
}
@@ -543,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (ClInitializers && FirstDynamic && LastDynamic)
+ createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, NULL));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
IRB.CreateCall2(AsanRegisterGlobals,
@@ -581,7 +738,6 @@ bool AddressSanitizer::runOnModule(Module &M) {
return false;
BL.reset(new FunctionBlackList(ClBlackListFile));
- CurrentModule = &M;
C = &(M.getContext());
LongSize = TD->getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
@@ -600,6 +756,23 @@ bool AddressSanitizer::runOnModule(Module &M) {
AsanInitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(AsanInitFunction);
+ // Create __asan_report* callbacks.
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ // IsWrite and TypeSize are encoded in the function name.
+ std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+ (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
+ // If we are merging crash callbacks, they have two parameters.
+ AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
+ M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+ }
+ }
+ // We insert an empty inline asm after __asan_report* to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+
llvm::Triple targetTriple(M.getTargetTriple());
bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI;
@@ -721,6 +894,8 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
}
}
+ AsanFunctionContext AFC(F);
+
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -728,9 +903,9 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
if (isInterestingMemoryAccess(Inst, &IsWrite))
- instrumentMop(Inst);
+ instrumentMop(AFC, Inst);
else
- instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+ instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));
}
NumInstrumented++;
}
diff --git a/lib/Transforms/Scalar/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 0690d76e7b..09e0f14451 100644
--- a/lib/Transforms/Scalar/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "bounds-checking"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
#include "llvm/Pass.h"
@@ -25,6 +24,7 @@
#include "llvm/Support/TargetFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Instrumentation.h"
using namespace llvm;
static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
@@ -67,11 +67,8 @@ namespace {
}
char BoundsChecking::ID = 0;
-INITIALIZE_PASS_BEGIN(BoundsChecking, "bounds-checking",
- "Run-time bounds checking", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_END(BoundsChecking, "bounds-checking",
- "Run-time bounds checking", false, false)
+INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
+ false, false)
/// getTrapBB - create a basic block that traps. All overflowing conditions
@@ -141,6 +138,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Size = SizeOffset.first;
Value *Offset = SizeOffset.second;
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
@@ -149,12 +147,17 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
// . Offset >= 0 (since the offset is given from the base ptr)
// . Size >= Offset (unsigned)
// . Size - Offset >= NeededSize (unsigned)
+ //
+ // optimization: if Size >= 0 (signed), skip 1st check
// FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
Value *ObjSize = Builder->CreateSub(Size, Offset);
- Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
Value *Cmp2 = Builder->CreateICmpULT(Size, Offset);
Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
- Value *Or = Builder->CreateOr(Cmp1, Builder->CreateOr(Cmp2, Cmp3));
+ Value *Or = Builder->CreateOr(Cmp2, Cmp3);
+ if (!SizeCI || SizeCI->getValue().slt(0)) {
+ Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+ Or = Builder->CreateOr(Cmp1, Or);
+ }
emitBranchToTrap(Or);
++ChecksAdded;
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index eaa3a4000f..00de882f17 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
+ BoundsChecking.cpp
EdgeProfiling.cpp
FunctionBlackList.cpp
GCOVProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index c7266e2f8d..1e0b4a348a 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -20,11 +20,12 @@ using namespace llvm;
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeAddressSanitizerPass(Registry);
+ initializeBoundsCheckingPass(Registry);
initializeEdgeProfilerPass(Registry);
+ initializeGCOVProfilerPass(Registry);
initializeOptimalEdgeProfilerPass(Registry);
initializePathProfilerPass(Registry);
- initializeGCOVProfilerPass(Registry);
- initializeAddressSanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
}
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index f76c77e1bd..a4bb5a66af 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -26,30 +26,6 @@ namespace llvm {
/// The type parameter T determines the type of the nodes of the graph.
template <typename T>
class MaximumSpanningTree {
-
- // A comparing class for comparing weighted edges.
- template <typename CT>
- struct EdgeWeightCompare {
- bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
- typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
- if (X.second > Y.second) return true;
- if (X.second < Y.second) return false;
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- return false;
- }
- };
-
public:
typedef std::pair<const T*, const T*> Edge;
typedef std::pair<Edge, double> EdgeWeight;
@@ -59,6 +35,33 @@ namespace llvm {
MaxSpanTree MST;
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
public:
static char ID; // Class identification, replacement for typeinfo
@@ -66,7 +69,7 @@ namespace llvm {
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index b2147968df..cc27146ebc 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -55,11 +55,11 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/TypeBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TypeBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Instrumentation.h"
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 4c12a9b624..dc0fa7175d 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -319,7 +319,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
if (Idx < 0)
return false;
if (IsWrite && isVtableAccess(I)) {
+ DEBUG(dbgs() << " VPTR : " << *I << "\n");
Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+ // StoredValue does not necessary have a pointer type.
+ if (isa<IntegerType>(StoredValue->getType()))
+ StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
+ // Call TsanVptrUpdate.
IRB.CreateCall2(TsanVptrUpdate,
IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index ba214d1a33..b344952cc5 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -9,7 +9,7 @@
//
// This file implements the Aggressive Dead Code Elimination pass. This pass
// optimistically assumes that all instructions are dead until proven otherwise,
-// allowing it to eliminate dead computations that other DCE passes do not
+// allowing it to eliminate dead computations that other DCE passes do not
// catch, particularly involving loop computations.
//
//===----------------------------------------------------------------------===//
@@ -36,13 +36,13 @@ namespace {
ADCE() : FunctionPass(ID) {
initializeADCEPass(*PassRegistry::getPassRegistry());
}
-
+
virtual bool runOnFunction(Function& F);
-
+
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
AU.setPreservesCFG();
}
-
+
};
}
@@ -52,7 +52,7 @@ INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
SmallVector<Instruction*, 128> worklist;
-
+
// Collect the set of "root" instructions that are known live.
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
@@ -62,7 +62,7 @@ bool ADCE::runOnFunction(Function& F) {
alive.insert(I.getInstructionIterator());
worklist.push_back(I.getInstructionIterator());
}
-
+
// Propagate liveness backwards to operands.
while (!worklist.empty()) {
Instruction* curr = worklist.pop_back_val();
@@ -72,7 +72,7 @@ bool ADCE::runOnFunction(Function& F) {
if (alive.insert(Inst))
worklist.push_back(Inst);
}
-
+
// The inverse of the live set is the dead set. These are those instructions
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
@@ -82,7 +82,7 @@ bool ADCE::runOnFunction(Function& F) {
worklist.push_back(I.getInstructionIterator());
I->dropAllReferences();
}
-
+
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
++NumRemoved;
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index bf9cc66392..a01e0661b1 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,7 +1,6 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
BasicBlockPlacement.cpp
- BoundsChecking.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index cbc089ab78..a8deda8b74 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
- "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
- cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -83,7 +78,7 @@ namespace {
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
-
+
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
/// update it.
@@ -116,6 +111,7 @@ namespace {
}
private:
+ bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -157,7 +153,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
// llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
EverMadeChange |= PlaceDbgValues(F);
@@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
WorkList.insert(*II);
}
- if (!DisableDeleteDeadBlocks)
- for (SmallPtrSet<BasicBlock*, 8>::iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
- DeleteDeadBlock(*I);
+ for (SmallPtrSet<BasicBlock*, 8>::iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+ DeleteDeadBlock(*I);
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
if (MadeChange)
ModifiedDT = true;
@@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ if (!SinglePred || SinglePred == BB) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ }
+ }
+ return Changed;
+}
+
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
@@ -336,7 +369,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
-
+
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
return;
}
@@ -547,7 +580,7 @@ protected:
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
BasicBlock *BB = CI->getParent();
-
+
// Lower inline assembly if we can.
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
@@ -564,19 +597,19 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
if (OptimizeInlineAsmInst(CI))
return true;
}
-
+
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
WeakVH IterHandle(CurInstIterator);
-
+
replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
TLInfo, ModifiedDT ? 0 : DT);
@@ -604,13 +637,13 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
-
+
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// that have the default "don't know" as the objectsize. Anything else
// should be left alone.
CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD);
+ return Simplifier.fold(CI, TD, TLInfo);
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (!TLI)
return false;
+ PHINode *PN = 0;
+ BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
- PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
- if (V && !PN)
- return false;
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
@@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (PN) {
BasicBlock::iterator BI = BB->begin();
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
if (&*BI != RI)
return false;
} else {
@@ -760,13 +804,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy) {
Value *Repl = Addr;
-
- // Try to collapse single-value PHI nodes. This is necessary to undo
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
// unprofitable PRE transformations.
SmallVector<Value*, 8> worklist;
SmallPtrSet<Value*, 16> Visited;
worklist.push_back(Addr);
-
+
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
@@ -778,20 +822,20 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
while (!worklist.empty()) {
Value *V = worklist.back();
worklist.pop_back();
-
+
// Break use-def graph loops.
if (!Visited.insert(V)) {
Consensus = 0;
break;
}
-
+
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
worklist.push_back(P->getIncomingValue(i));
continue;
}
-
+
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
@@ -826,15 +870,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
continue;
}
-
+
Consensus = 0;
break;
}
-
+
// If the addressing mode couldn't be determined, or if multiple different
// ones were determined, bail out now.
if (!Consensus) return false;
-
+
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
@@ -943,7 +987,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Use a WeakVH to hold onto it in case this happens.
WeakVH IterHandle(CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
-
+
RecursivelyDeleteTriviallyDeadInstructions(Repl);
if (IterHandle != CurInstIterator) {
@@ -955,7 +999,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// This address is now available for reassignment, so erase the table
// entry; we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
- }
+ }
}
++NumMemoryInsts;
return true;
@@ -967,12 +1011,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
- TargetLowering::AsmOperandInfoVector
+ TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI->ParseConstraints(CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
@@ -1187,7 +1231,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CastInst *CI = dyn_cast<CastInst>(I)) {
// If the source of the cast is a constant, then this should have
// already been constant folded. The only reason NOT to constant fold
@@ -1207,23 +1251,23 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CmpInst *CI = dyn_cast<CmpInst>(I))
return OptimizeCmpExpression(CI);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
return false;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (TLI)
return OptimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType());
return false;
}
-
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
@@ -1237,7 +1281,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
@@ -1265,7 +1309,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
}
// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
+// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
bool MadeChange = false;
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c8448fa6c1..8b1283ff25 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -248,7 +248,7 @@ static bool isShortenable(Instruction *I) {
// Don't shorten stores for now
if (isa<StoreInst>(I))
return false;
-
+
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: return false;
@@ -292,7 +292,7 @@ namespace {
/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
/// completely overwrites a store to the 'Earlier' location.
-/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
const AliasAnalysis::Location &Earlier,
@@ -315,7 +315,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
if (AA.getTargetData() == 0 &&
Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
-
+
return OverwriteUnknown;
}
@@ -378,10 +378,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size > Earlier.Size &&
+ Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
return OverwriteComplete;
-
+
// The other interesting case is if the later store overwrites the end of
// the earlier store
//
@@ -520,11 +520,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) &&
+ if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
- DepWriteOffset, InstWriteOffset);
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
+ DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -533,7 +533,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
-
+
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = Inst;
@@ -551,16 +551,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
if (llvm::isPowerOf2_64(InstWriteOffset) ||
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
+
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", "
<< DepLoc.Size << ")"
<< *Inst << '\n');
-
+
Value* DepWriteLength = DepIntrinsic->getLength();
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
- InstWriteOffset -
+ InstWriteOffset -
DepWriteOffset);
DepIntrinsic->setLength(TrimmedLength);
MadeChange = true;
@@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) {
+ if (isa<AllocaInst>(BBI)) {
+ // Remove allocas from the list of dead stack objects; there can't be
+ // any references before the definition.
DeadStackObjects.remove(BBI);
continue;
}
if (CallSite CS = cast<Value>(BBI)) {
+ // Remove allocation function calls from the list of dead stack objects;
+ // there can't be any references before the definition.
+ if (isAllocLikeFn(BBI))
+ DeadStackObjects.remove(BBI);
+
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
@@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
- return MadeChange;
+ break;
continue;
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index f3c92d64c2..975954953b 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -39,7 +39,7 @@ static unsigned getHash(const void *V) {
}
//===----------------------------------------------------------------------===//
-// SimpleValue
+// SimpleValue
//===----------------------------------------------------------------------===//
namespace {
@@ -47,16 +47,16 @@ namespace {
/// scoped hash table.
struct SimpleValue {
Instruction *Inst;
-
+
SimpleValue(Instruction *I) : Inst(I) {
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
}
-
+
bool isSentinel() const {
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
}
-
+
static bool canHandle(Instruction *Inst) {
// This can only handle non-void readnone functions.
if (CallInst *CI = dyn_cast<CallInst>(Inst))
@@ -90,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> {
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
Instruction *Inst = Val.Inst;
-
+
// Hash in all of the operands as pointers.
unsigned Res = 0;
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
@@ -126,13 +126,13 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
if (LHS.isSentinel() || RHS.isSentinel())
return LHSI == RHSI;
-
+
if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
return LHSI->isIdenticalTo(RHSI);
}
//===----------------------------------------------------------------------===//
-// CallValue
+// CallValue
//===----------------------------------------------------------------------===//
namespace {
@@ -140,21 +140,21 @@ namespace {
/// the scoped hash table.
struct CallValue {
Instruction *Inst;
-
+
CallValue(Instruction *I) : Inst(I) {
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
}
-
+
bool isSentinel() const {
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
}
-
+
static bool canHandle(Instruction *Inst) {
// Don't value number anything that returns void.
if (Inst->getType()->isVoidTy())
return false;
-
+
CallInst *CI = dyn_cast<CallInst>(Inst);
if (CI == 0 || !CI->onlyReadsMemory())
return false;
@@ -168,7 +168,7 @@ namespace llvm {
template<> struct isPodLike<CallValue> {
static const bool value = true;
};
-
+
template<> struct DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -189,7 +189,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
"Cannot value number calls with metadata operands");
Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
}
-
+
// Mix in the opcode.
return (Res << 1) ^ Inst->getOpcode();
}
@@ -203,11 +203,11 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
//===----------------------------------------------------------------------===//
-// EarlyCSE pass.
+// EarlyCSE pass.
//===----------------------------------------------------------------------===//
namespace {
-
+
/// EarlyCSE - This pass does a simple depth-first walk over the dominator
/// tree, eliminating trivially redundant instructions and using instsimplify
/// to canonicalize things as it goes. It is intended to be fast and catch
@@ -223,14 +223,14 @@ public:
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
AllocatorTy> ScopedHTType;
-
+
/// AvailableValues - This scoped hash table contains the current values of
/// all of our simple scalar expressions. As we walk down the domtree, we
/// look to see if instructions are in this: if so, we replace them with what
/// we find, otherwise we insert them so that dominated values can succeed in
/// their lookup.
ScopedHTType *AvailableValues;
-
+
/// AvailableLoads - This scoped hash table contains the current values
/// of loads. This allows us to get efficient access to dominating loads when
/// we have a fully redundant load. In addition to the most recent load, we
@@ -243,15 +243,15 @@ public:
typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
LoadHTType *AvailableLoads;
-
+
/// AvailableCalls - This scoped hash table contains the current values
/// of read-only call values. It uses the same generation count as loads.
typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
CallHTType *AvailableCalls;
-
+
/// CurrentGeneration - This is the current generation of the memory value.
unsigned CurrentGeneration;
-
+
static char ID;
explicit EarlyCSE() : FunctionPass(ID) {
initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
@@ -326,7 +326,7 @@ private:
};
bool processNode(DomTreeNode *Node);
-
+
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
@@ -350,7 +350,7 @@ INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
BasicBlock *BB = Node->getBlock();
-
+
// If this block has a single predecessor, then the predecessor is the parent
// of the domtree node and all of the live out memory values are still current
// in this block. If this block has multiple predecessors, then they could
@@ -359,20 +359,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// predecessors.
if (BB->getSinglePredecessor() == 0)
++CurrentGeneration;
-
+
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
StoreInst *LastStore = 0;
-
+
bool Changed = false;
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
-
+
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
@@ -381,7 +381,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumSimplify;
continue;
}
-
+
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
@@ -392,7 +392,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumSimplify;
continue;
}
-
+
// If this is a simple instruction that we can value number, process it.
if (SimpleValue::canHandle(Inst)) {
// See if the instruction has an available value. If so, use it.
@@ -404,12 +404,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSE;
continue;
}
-
+
// Otherwise, just remember that this value is available.
AvailableValues->insert(Inst, Inst);
continue;
}
-
+
// If this is a non-volatile load, process it.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
@@ -417,7 +417,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore = 0;
continue;
}
-
+
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
@@ -431,18 +431,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSELoad;
continue;
}
-
+
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
LastStore = 0;
continue;
}
-
+
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
LastStore = 0;
-
+
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
@@ -457,19 +457,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSECall;
continue;
}
-
+
// Otherwise, remember that we have this instruction.
AvailableCalls->insert(Inst,
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
continue;
}
-
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
if (Inst->mayWriteToMemory()) {
++CurrentGeneration;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
@@ -483,7 +483,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore = 0;
continue;
}
-
+
// Okay, we just invalidated anything we knew about loaded values. Try
// to salvage *something* by remembering that the stored value is a live
// version of the pointer. It is safe to forward from volatile stores
@@ -491,7 +491,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// the store.
AvailableLoads->insert(SI->getPointerOperand(),
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
-
+
// Remember that this was the last store we saw for DSE.
if (SI->isSimple())
LastStore = SI;
@@ -509,7 +509,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
-
+
// Tables that the pass uses when walking the domtree.
ScopedHTType AVTable;
AvailableValues = &AVTable;
@@ -517,7 +517,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
AvailableLoads = &LoadTable;
CallHTType CallTable;
AvailableCalls = &CallTable;
-
+
CurrentGeneration = 0;
bool Changed = false;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 476ec383e6..4822fd0944 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -173,7 +173,7 @@ Expression ValueTable::create_expression(Instruction *I) {
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
}
-
+
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
// Sort the operand value numbers so x<y and y>x get the same value number.
CmpInst::Predicate Predicate = C->getPredicate();
@@ -187,7 +187,7 @@ Expression ValueTable::create_expression(Instruction *I) {
II != IE; ++II)
e.varargs.push_back(*II);
}
-
+
return e;
}
@@ -391,7 +391,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
-
+
Instruction* I = cast<Instruction>(V);
Expression exp;
switch (I->getOpcode()) {
@@ -507,17 +507,17 @@ namespace {
const TargetLibraryInfo *TLI;
ValueTable VN;
-
+
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Value *Val;
- BasicBlock *BB;
+ const BasicBlock *BB;
LeaderTableEntry *Next;
};
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
-
+
SmallVector<Instruction*, 8> InstrsToErase;
public:
static char ID; // Pass identification, replacement for typeid
@@ -527,14 +527,14 @@ namespace {
}
bool runOnFunction(Function &F);
-
+
/// markInstructionForDeletion - This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
VN.erase(I);
InstrsToErase.push_back(I);
}
-
+
const TargetData *getTargetData() const { return TD; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
@@ -542,21 +542,21 @@ namespace {
private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
- void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
Curr.BB = BB;
return;
}
-
+
LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
Node->Val = V;
Node->BB = BB;
Node->Next = Curr.Next;
Curr.Next = Node;
}
-
+
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
@@ -567,7 +567,7 @@ namespace {
Prev = Curr;
Curr = Curr->Next;
}
-
+
if (Prev) {
Prev->Next = Curr->Next;
} else {
@@ -597,7 +597,7 @@ namespace {
AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
}
-
+
// Helper fuctions
// FIXME: eliminate or document these better
@@ -608,13 +608,13 @@ namespace {
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
bool performPRE(Function &F);
- Value *findLeader(BasicBlock *BB, uint32_t num);
+ Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root);
- bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
+ const BasicBlockEdge &Root);
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
};
char GVN::ID = 0;
@@ -735,15 +735,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
StoredVal->getType()->isStructTy() ||
StoredVal->getType()->isArrayTy())
return false;
-
+
// The store has to be at least as big as the load.
if (TD.getTypeSizeInBits(StoredVal->getType()) <
TD.getTypeSizeInBits(LoadTy))
return false;
-
+
return true;
}
-
+
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
@@ -751,80 +751,80 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
/// InsertPt is the place to insert new instructions.
///
/// If we can't do it, return null.
-static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
-
+
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
-
+
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
-
+
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
-
+
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
-
+
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
-
+
// Cast to pointer if the load needs a pointer type.
if (LoadedTy->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
-
+
return StoredVal;
}
-
+
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
-
+
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
// Convert vectors and fp to integer, which can be manipulated.
if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
-
+
// Truncate the integer to the right size now.
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
-
+
if (LoadedTy == NewIntTy)
return StoredVal;
-
+
// If the result is a pointer, inttoptr.
if (LoadedTy->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
-
+
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
@@ -845,13 +845,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
-
+
int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
-
+
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
// FIXME: Study to see if/when this happens. One case is forwarding a memset
@@ -866,18 +866,18 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
abort();
}
#endif
-
+
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
-
+
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
-
-
+
+
bool isAAFailure = false;
if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
@@ -895,7 +895,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
#endif
return -1;
}
-
+
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
@@ -903,11 +903,11 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
-
+
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
-}
+}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
@@ -933,23 +933,23 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
-
+
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
if (R != -1) return R;
-
+
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
-
+
unsigned Size = MemoryDependenceAnalysis::
getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
if (Size == 0) return -1;
-
+
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
}
@@ -968,29 +968,29 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, TD);
-
+
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
-
+
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
-
+
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
-
+
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
MI->getDest(), MemSizeInBits, TD);
if (Offset == -1)
return Offset;
-
+
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
- Constant *OffsetCst =
+ Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
@@ -998,7 +998,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
return Offset;
return -1;
}
-
+
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
@@ -1009,32 +1009,32 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
-
+
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
-
+
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
-
+
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
-
+
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD.isLittleEndian())
ShiftAmt = Offset*8;
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-
+
if (ShiftAmt)
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
-
+
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
-
+
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
@@ -1061,14 +1061,14 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
NewLoadSize = NextPowerOf2(NewLoadSize);
Value *PtrVal = SrcVal->getPointerOperand();
-
+
// Insert the new load after the old load. This ensures that subsequent
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy =
+ Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
- DestPTy = PointerType::get(DestPTy,
+ DestPTy = PointerType::get(DestPTy,
cast<PointerType>(PtrVal->getType())->getAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
@@ -1078,7 +1078,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
+
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
@@ -1087,7 +1087,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
RV = Builder.CreateTrunc(RV, SrcVal->getType());
SrcVal->replaceAllUsesWith(RV);
-
+
// We would like to use gvn.markInstructionForDeletion here, but we can't
// because the load is already memoized into the leader map table that GVN
// tracks. It is potentially possible to remove the load from the table,
@@ -1096,7 +1096,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
gvn.getMemDep().removeInstruction(SrcVal);
SrcVal = NewLoad;
}
-
+
return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
}
@@ -1110,7 +1110,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
-
+
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
@@ -1119,9 +1119,9 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Value *Val = MSI->getValue();
if (LoadSize != 1)
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
-
+
Value *OneElt = Val;
-
+
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
// If we can double the number of bytes set, do it.
@@ -1131,16 +1131,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
NumBytesSet <<= 1;
continue;
}
-
+
// Otherwise insert one byte at a time.
Value *ShVal = Builder.CreateShl(Val, 1*8);
Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
-
+
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
}
-
+
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
@@ -1149,7 +1149,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
- Constant *OffsetCst =
+ Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
@@ -1166,13 +1166,13 @@ struct AvailableValueInBlock {
LoadVal, // A value produced by a load.
MemIntrin // A memory intrinsic which is loaded from.
};
-
+
/// V - The value that is live out of the block.
PointerIntPair<Value *, 2, ValType> Val;
-
+
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
-
+
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
@@ -1192,7 +1192,7 @@ struct AvailableValueInBlock {
Res.Offset = Offset;
return Res;
}
-
+
static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
unsigned Offset = 0) {
AvailableValueInBlock Res;
@@ -1211,17 +1211,17 @@ struct AvailableValueInBlock {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
-
+
LoadInst *getCoercedLoadValue() const {
assert(isCoercedLoadValue() && "Wrong accessor");
return cast<LoadInst>(Val.getPointer());
}
-
+
MemIntrinsic *getMemIntrinValue() const {
assert(isMemIntrinValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
-
+
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
@@ -1233,7 +1233,7 @@ struct AvailableValueInBlock {
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
-
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1245,7 +1245,7 @@ struct AvailableValueInBlock {
} else {
Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
gvn);
-
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1268,12 +1268,12 @@ struct AvailableValueInBlock {
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
-static Value *ConstructSSAForLoadSet(LoadInst *LI,
+static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
GVN &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
- if (ValuesPerBlock.size() == 1 &&
+ if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
LI->getParent()))
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
@@ -1282,29 +1282,29 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
-
+
Type *LoadTy = LI->getType();
-
+
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
-
+
if (SSAUpdate.HasValueForBlock(BB))
continue;
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
}
-
+
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
+
// If new PHI nodes were created, notify alias analysis.
if (V->getType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
-
+
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
-
+
// Now that we've copied information to the new PHIs, scan through
// them again and inform alias analysis that we've added potentially
// escaping uses to any values that are operands to these PHIs.
@@ -1376,7 +1376,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
-
+
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
@@ -1392,7 +1392,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
}
}
-
+
// Check to see if we have something like this:
// load i32* P
// load i8* (P+1)
@@ -1404,7 +1404,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
LI->getPointerOperand(),
DepLI, *TD);
-
+
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
Offset));
@@ -1423,10 +1423,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
continue;
- }
+ }
}
}
-
+
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1443,7 +1443,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
UndefValue::get(LI->getType())));
continue;
}
-
+
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
@@ -1461,7 +1461,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
S->getValueOperand()));
continue;
}
-
+
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
@@ -1470,12 +1470,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
UnavailableBlocks.push_back(DepBB);
continue;
- }
+ }
}
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
continue;
}
-
+
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1489,7 +1489,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// its value. Insert PHIs and remove the fully redundant value now.
if (UnavailableBlocks.empty()) {
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
+
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
@@ -1532,10 +1532,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
if (Blockers.count(TmpBB))
return false;
-
+
// If any of these blocks has more than one successor (i.e. if the edge we
- // just traversed was critical), then there are other paths through this
- // block along which the load may not be anticipated. Hoisting the load
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
// above this block would be adding the load to execution paths along
// which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
@@ -1613,7 +1613,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
-
+
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available LI and insert a new load into
@@ -1690,10 +1690,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
DEBUG(if (!NewInsts.empty())
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
<< *NewInsts.back() << '\n');
-
+
// Assign value numbers to the new instructions.
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
- // FIXME: We really _ought_ to insert these value numbers into their
+ // FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
@@ -1795,7 +1795,7 @@ bool GVN::processLoad(LoadInst *L) {
markInstructionForDeletion(L);
return true;
}
-
+
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
@@ -1821,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) {
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
L->getType(), L, *TD);
}
-
+
// Check to see if we have something like this:
// load i32* P
// load i8* (P+1)
@@ -1831,14 +1831,14 @@ bool GVN::processLoad(LoadInst *L) {
// we have the first instruction in the entry block.
if (DepLI == L)
return false;
-
+
int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
L->getPointerOperand(),
DepLI, *TD);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
-
+
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
@@ -1848,11 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
if (Offset != -1)
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
}
-
+
if (AvailVal) {
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n");
-
+
// Replace the load!
L->replaceAllUsesWith(AvailVal);
if (AvailVal->getType()->isPointerTy())
@@ -1862,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L) {
return true;
}
}
-
+
// If the value isn't available, don't do anything!
if (Dep.isClobber()) {
DEBUG(
@@ -1892,7 +1892,7 @@ bool GVN::processLoad(LoadInst *L) {
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
-
+
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
@@ -1902,11 +1902,11 @@ bool GVN::processLoad(LoadInst *L) {
L, *TD);
if (StoredVal == 0)
return false;
-
+
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
<< '\n' << *L << "\n\n\n");
}
- else
+ else
return false;
}
@@ -1921,7 +1921,7 @@ bool GVN::processLoad(LoadInst *L) {
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
Value *AvailableVal = DepLI;
-
+
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
@@ -1931,14 +1931,14 @@ bool GVN::processLoad(LoadInst *L) {
L, *TD);
if (AvailableVal == 0)
return false;
-
+
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
<< "\n" << *L << "\n\n\n");
}
- else
+ else
return false;
}
-
+
// Remove it!
patchAndReplaceAllUsesWith(AvailableVal, L);
if (DepLI->getType()->isPointerTy())
@@ -1957,7 +1957,7 @@ bool GVN::processLoad(LoadInst *L) {
++NumGVNLoad;
return true;
}
-
+
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
@@ -1972,28 +1972,28 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
-// findLeader - In order to find a leader for a given value number at a
+// findLeader - In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
-// and then scan the list to find one whose block dominates the block in
+// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return 0;
-
+
Value *Val = 0;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
}
-
+
LeaderTableEntry* Next = Vals.Next;
while (Next) {
if (DT->dominates(Next->BB, BB)) {
if (isa<Constant>(Next->Val)) return Next->Val;
if (!Val) Val = Next->Val;
}
-
+
Next = Next->Next;
}
@@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// use is dominated by the given basic block. Returns the number of uses that
/// were replaced.
unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root) {
+ const BasicBlockEdge &Root) {
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
Use &U = (UI++).getUse();
- // If From occurs as a phi node operand then the use implicitly lives in the
- // corresponding incoming block. Otherwise it is the block containing the
- // user that must be dominated by Root.
- BasicBlock *UsingBlock;
- if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
- UsingBlock = PN->getIncomingBlock(U);
- else
- UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
- if (DT->dominates(Root, UsingBlock)) {
+ if (DT->dominates(Root, U)) {
U.set(To);
++Count;
}
@@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
return Count;
}
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+ DominatorTree *DT) {
+ // While in theory it is interesting to consider the case in which Dst has
+ // more than one predecessor, because Dst might be part of a loop which is
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+ const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+ const BasicBlock *Src = E.getStart();
+ assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+ (void)Src;
+ return Pred != 0;
+}
+
/// propagateEquality - The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
LVN = RVN;
}
}
- assert((!isa<Instruction>(RHS) ||
- DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
- "Instruction doesn't dominate scope!");
// If value numbering later sees that an instruction in the scope is equal
// to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
@@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// if RHS is an instruction (if an instruction in the scope is morphed into
// LHS then it will be turned into RHS by the next GVN iteration anyway, so
// using the leader table is about compiling faster, not optimizing better).
- if (!isa<Instruction>(RHS))
- addToLeaderTable(LVN, RHS, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd && !isa<Instruction>(RHS))
+ addToLeaderTable(LVN, RHS, Root.getEnd());
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
- Value *NotCmp = findLeader(Root, Num);
+ Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
}
// Ensure that any instruction in scope that gets the "A < B" value number
// is replaced with false.
- addToLeaderTable(Num, NotVal, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd)
+ addToLeaderTable(Num, NotVal, Root.getEnd());
continue;
}
@@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
return Changed;
}
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
-/// true if every path from the entry block to 'Dst' passes via this edge. In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
- DominatorTree *DT) {
- // While in theory it is interesting to consider the case in which Dst has
- // more than one predecessor, because Dst might be part of a loop which is
- // only reachable from Src, in practice it is pointless since at the time
- // GVN runs all such loops have preheaders, which means that Dst will have
- // been changed to have only one predecessor, namely Src.
- BasicBlock *Pred = Dst->getSinglePredecessor();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
- return Pred != 0;
-}
-
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
@@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // Avoid multiple edges early.
+ if (TrueSucc == FalseSucc)
+ return false;
+
BasicBlock *Parent = BI->getParent();
bool Changed = false;
- if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
+ Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+ BasicBlockEdge TrueE(Parent, TrueSucc);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
- if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getFalse(FalseSucc->getContext()),
- FalseSucc);
+ Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+ BasicBlockEdge FalseE(Parent, FalseSucc);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
return Changed;
}
@@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+ BasicBlockEdge E(Parent, Dst);
+ if (E.isSingleEdge())
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
}
return Changed;
}
@@ -2243,7 +2244,7 @@ bool GVN::processInstruction(Instruction *I) {
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
if (I->getType()->isVoidTy()) return false;
-
+
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
@@ -2261,7 +2262,7 @@ bool GVN::processInstruction(Instruction *I) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
-
+
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
@@ -2270,7 +2271,7 @@ bool GVN::processInstruction(Instruction *I) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
-
+
// Remove it!
patchAndReplaceAllUsesWith(repl, I);
if (MD && repl->getType()->isPointerTy())
@@ -2297,7 +2298,7 @@ bool GVN::runOnFunction(Function& F) {
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = FI++;
-
+
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
@@ -2454,7 +2455,7 @@ bool GVN::performPRE(Function &F) {
// we would need to insert instructions in more than one pred.
if (NumWithout != 1 || NumWith == 0)
continue;
-
+
// Don't do PRE across indirect branch.
if (isa<IndirectBrInst>(PREPred->getTerminator()))
continue;
@@ -2530,7 +2531,7 @@ bool GVN::performPRE(Function &F) {
unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
}
-
+
if (MD)
MD->invalidateCachedPointerInfo(Phi);
}
@@ -2567,7 +2568,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
-
+
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@@ -2602,7 +2603,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
const LeaderTableEntry *Node = &I->second;
assert(Node->Val != Inst && "Inst still in value numbering scope!");
-
+
while (Node->Next) {
Node = Node->Next;
assert(Node->Val != Inst && "Inst still in value numbering scope!");
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index c2bd6e69ee..b36a3cb776 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -12,7 +12,7 @@
// global). Such a transformation can significantly reduce the register pressure
// when many globals are involved.
//
-// For example, consider the code which touches several global variables at
+// For example, consider the code which touches several global variables at
// once:
//
// static int foo[N], bar[N], baz[N];
@@ -208,8 +208,8 @@ bool GlobalMerge::doInitialization(Module &M) {
if (BSSGlobals.size() > 1)
Changed |= doMerge(BSSGlobals, M, false);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
// them in the future.
// if (ConstGlobals.size() > 1)
// Changed |= doMerge(ConstGlobals, M, true);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index a9ba6579db..37f8bdfbff 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1215,21 +1215,26 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
return 0;
}
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L, DominatorTree *DT) {
+/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
+static ICmpInst *getLoopTest(Loop *L) {
assert(L->getExitingBlock() && "expected loop exit");
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return false;
+ return 0;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
+ return dyn_cast<ICmpInst>(BI->getCondition());
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR to simplify the exit condition to an ICMP.
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ ICmpInst *Cond = getLoopTest(L);
if (!Cond)
return true;
@@ -1259,6 +1264,48 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
+/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
+/// down to checking that all operands are constant and listing instructions
+/// that may hide undef.
+static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
+ unsigned Depth) {
+ if (isa<Constant>(V))
+ return !isa<UndefValue>(V);
+
+ if (Depth >= 6)
+ return false;
+
+ // Conservatively handle non-constant non-instructions. For example, Arguments
+ // may be undef.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // Load and return values may be undef.
+ if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
+ return false;
+
+ // Optimistically handle other instructions.
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ if (!Visited.insert(*OI))
+ continue;
+ if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ return false;
+ }
+ return true;
+}
+
+/// Return true if the given value is concrete. We must prove that undef can
+/// never reach it.
+///
+/// TODO: If we decide that this is a good approach to checking for undef, we
+/// may factor it into a common location.
+static bool hasConcreteDef(Value *V) {
+ SmallPtrSet<Value*, 8> Visited;
+ Visited.insert(V);
+ return hasConcreteDefImpl(V, Visited, 0);
+}
+
/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
/// be rewritten) loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
@@ -1283,6 +1330,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// valid count without scaling the address stride, so it remains a pointer
/// expression as far as SCEV is concerned.
///
+/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
+///
/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
///
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
@@ -1331,6 +1380,19 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
if (getLoopPhiForCounter(IncV, L, DT) != Phi)
continue;
+ // Avoid reusing a potentially undef value to compute other values that may
+ // have originally had a concrete definition.
+ if (!hasConcreteDef(Phi)) {
+ // We explicitly allow unknown phis as long as they are already used by
+ // the loop test. In this case we assume that performing LFTR could not
+ // increase the number of undef users.
+ if (ICmpInst *Cond = getLoopTest(L)) {
+ if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
+ && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
+ continue;
+ }
+ }
+ }
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -1347,7 +1409,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// If two IVs both count from zero or both count from nonzero then the
// narrower is likely a dead phi that has been widened. Use the wider phi
// to allow the other to be eliminated.
- if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
continue;
}
BestPhi = Phi;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 429b61b6e5..dd42c59059 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -670,6 +670,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ // Can't thread indirect branch with no successors.
+ if (IB->getNumSuccessors() == 0) return false;
Condition = IB->getAddress()->stripPointerCasts();
Preference = WantBlockAddress;
} else {
@@ -859,7 +861,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If all of the loads and stores that feed the value have the same TBAA tag,
// then we can propagate it onto any newly inserted loads.
- MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -885,7 +887,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
OneUnavailablePred = PredBB;
continue;
}
-
+
// If tbaa tags disagree or are not present, forget about them.
if (TBAATag != ThisTBAATag) TBAATag = 0;
@@ -949,7 +951,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
NewVal->setDebugLoc(LI->getDebugLoc());
if (TBAATag)
NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 582948ea14..0192e928fe 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -175,7 +175,9 @@ namespace {
bool canSinkOrHoistInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- void PromoteAliasSet(AliasSet &AS);
+ void PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts);
};
}
@@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts);
}
// Clear out loops state information for the next iteration
@@ -631,6 +636,7 @@ namespace {
Value *SomePtr; // Designated pointer to store to.
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ SmallVectorImpl<Instruction*> &LoopInsertPts;
AliasSetTracker &AST;
DebugLoc DL;
int Alignment;
@@ -638,11 +644,12 @@ namespace {
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
- DebugLoc dl, int alignment)
+ SmallVectorImpl<BasicBlock*> &LEB,
+ SmallVectorImpl<Instruction*> &LIP,
+ AliasSetTracker &ast, DebugLoc dl, int alignment)
: LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
- Alignment(alignment) {}
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+ AST(ast), DL(dl), Alignment(alignment) {}
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
@@ -662,7 +669,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
+ Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -684,7 +691,9 @@ namespace {
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
-void LICM::PromoteAliasSet(AliasSet &AS) {
+void LICM::PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.resize(ExitBlocks.size());
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ }
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST, DL, Alignment);
+ InsertPts, *CurAST, DL, Alignment);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index f7f32981ba..3771f5aa97 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -32,10 +32,10 @@ namespace {
LoopDeletion() : LoopPass(ID) {
initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
}
-
+
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
-
+
bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
SmallVector<BasicBlock*, 4>& exitBlocks,
bool &Changed, BasicBlock *Preheader);
@@ -46,7 +46,7 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
-
+
AU.addPreserved<ScalarEvolution>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<LoopInfo>();
@@ -55,7 +55,7 @@ namespace {
}
};
}
-
+
char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
@@ -79,7 +79,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
SmallVector<BasicBlock*, 4>& exitBlocks,
bool &Changed, BasicBlock *Preheader) {
BasicBlock* exitBlock = exitBlocks[0];
-
+
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
// must pass through a PHI in the exit block, meaning that this check is
@@ -97,14 +97,14 @@ bool LoopDeletion::IsLoopDead(Loop* L,
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
-
+
if (Instruction* I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
++BI;
}
-
+
// Make sure that no instructions in the block have potential side-effects.
// This includes instructions that could write to memory, and loads that are
// marked volatile. This could be made more aggressive by using aliasing
@@ -117,23 +117,23 @@ bool LoopDeletion::IsLoopDead(Loop* L,
return false;
}
}
-
+
return true;
}
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
-/// observable behavior of the program other than finite running time. Note
+/// observable behavior of the program other than finite running time. Note
/// we do ensure that this never remove a loop that might be infinite, as doing
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
- // We can only remove the loop if there is a preheader that we can
+ // We can only remove the loop if there is a preheader that we can
// branch from after removing it.
BasicBlock* preheader = L->getLoopPreheader();
if (!preheader)
return false;
-
+
// If LoopSimplify form is not available, stay out of trouble.
if (!L->hasDedicatedExits())
return false;
@@ -142,36 +142,36 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// they would already have been removed in earlier executions of this pass.
if (L->begin() != L->end())
return false;
-
+
SmallVector<BasicBlock*, 4> exitingBlocks;
L->getExitingBlocks(exitingBlocks);
-
+
SmallVector<BasicBlock*, 4> exitBlocks;
L->getUniqueExitBlocks(exitBlocks);
-
+
// We require that the loop only have a single exit block. Otherwise, we'd
// be in the situation of needing to be able to solve statically which exit
// block will be branched to, or trying to preserve the branching logic in
// a loop invariant manner.
if (exitBlocks.size() != 1)
return false;
-
+
// Finally, we have to check that the loop really is dead.
bool Changed = false;
if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
-
+
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
-
+
// Now that we know the removal is safe, remove the loop by changing the
- // branch from the preheader to go to the single exit block.
+ // branch from the preheader to go to the single exit block.
BasicBlock* exitBlock = exitBlocks[0];
-
+
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
// mess with this unless you have good reason and know what you're doing.
@@ -197,7 +197,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
P->removeIncomingValue(exitingBlocks[i]);
++BI;
}
-
+
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
DominatorTree& DT = getAnalysis<DominatorTree>();
@@ -211,7 +211,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
}
-
+
ChildNodes.clear();
DT.eraseNode(*LI);
@@ -219,7 +219,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// delete it freely later.
(*LI)->dropAllReferences();
}
-
+
// Erase the instructions and the blocks without having to worry
// about ordering because we already dropped the references.
// NOTE: This iteration is safe because erasing the block does not remove its
@@ -236,13 +236,13 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
E = blocks.end(); I != E; ++I)
loopInfo.removeBlock(*I);
-
+
// The last step is to inform the loop pass manager that we've
// eliminated this loop.
LPM.deleteLoopFromQueue(L);
Changed = true;
-
+
++NumDeleted;
-
+
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5fe9462159..ac1082cbfb 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -173,7 +173,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
- // Disable loop idiom recognition if the function's name is a common idiom.
+ // Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy")
return false;
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index f0f05e6f50..982400c5a3 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
}
};
}
-
+
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 59aace9e36..7eeb1527ad 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -418,12 +418,13 @@ bool LoopRotate::rotateLoop(Loop *L) {
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
- // thus is not a preheader anymore. Split the edge to form a real preheader.
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
NewPH->setName(NewHeader->getName() + ".lr.ph");
- // Preserve canonical loop form, which means that 'Exit' should have only one
- // predecessor.
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor.
BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
ExitSplit->moveBefore(Exit);
} else {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ba969e675..0ae7a5151e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -2836,7 +2837,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -3006,42 +3007,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -3056,7 +3079,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 221911866c..7419a6543e 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -25,12 +25,12 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
-
+
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
-
+
CXI->replaceAllUsesWith(Orig);
CXI->eraseFromParent();
return true;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 052cc3dac0..2a5ee33eb1 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -44,7 +44,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
/*skip along*/;
-
+
// Compute the offset implied by the rest of the indices.
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
@@ -58,7 +58,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
-
+
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
@@ -77,7 +77,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
Ptr2 = Ptr2->stripPointerCasts();
GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
-
+
bool VariableIdxFound = false;
// If one pointer is a GEP and the other isn't, then see if the GEP is a
@@ -91,7 +91,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
-
+
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
@@ -99,7 +99,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// handle no other case.
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
-
+
// Skip any common indices and track the GEP types.
unsigned Idx = 1;
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
@@ -109,7 +109,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
-
+
Offset = Offset2-Offset1;
return true;
}
@@ -128,19 +128,19 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
- // The range is closed at the start and open at the end: [Start, End).
+ // The range is closed at the start and open at the end: [Start, End).
int64_t Start, End;
/// StartPtr - The getelementptr instruction that points to the start of the
/// range.
Value *StartPtr;
-
+
/// Alignment - The known alignment of the first store.
unsigned Alignment;
-
+
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
-
+
bool isProfitableToUseMemset(const TargetData &TD) const;
};
@@ -152,17 +152,17 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
-
+
// If any of the stores are a memset, then it is always good to extend the
// memset.
for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
if (!isa<StoreInst>(TheStores[i]))
return true;
-
+
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
if (TheStores.size() == 2) return false;
-
+
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
@@ -175,15 +175,15 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// actually reducing the number of stores used.
unsigned Bytes = unsigned(End-Start);
unsigned NumPointerStores = Bytes/TD.getPointerSize();
-
+
// Assume the remaining bytes if any are done a byte at a time.
unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
-
+
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.
return TheStores.size() > NumPointerStores+NumByteStores;
-}
+}
namespace {
@@ -195,12 +195,12 @@ class MemsetRanges {
const TargetData &TD;
public:
MemsetRanges(const TargetData &td) : TD(td) {}
-
+
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
-
+
void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
addStore(OffsetFromFirst, SI);
@@ -210,21 +210,21 @@ public:
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
-
+
addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(), SI->getAlignment(), SI);
}
-
+
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
}
-
+
void addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst);
};
-
+
} // end anon namespace
@@ -240,10 +240,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
-
+
while (I != E && Start > I->End)
++I;
-
+
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
@@ -256,18 +256,18 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
R.TheStores.push_back(Inst);
return;
}
-
+
// This store overlaps with I, add it.
I->TheStores.push_back(Inst);
-
+
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
if (I->Start <= Start && I->End >= End)
return;
-
+
// Now we know that Start <= I->End and End >= I->Start so the range overlaps
// but is not entirely contained within the range.
-
+
// See if the range extends the start of the range. In this case, it couldn't
// possibly cause it to join the prior range, because otherwise we would have
// stopped on *it*.
@@ -276,7 +276,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
I->StartPtr = Ptr;
I->Alignment = Alignment;
}
-
+
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
// is in or right at the end of I), and that End >= I->Start. Extend I out to
// End.
@@ -325,7 +325,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
-
+
// Helper fuctions
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
@@ -341,7 +341,7 @@ namespace {
bool iterateOnFunction(Function &F);
};
-
+
char MemCpyOpt::ID = 0;
}
@@ -361,16 +361,16 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consecutive ones, it
/// attempts to merge them together into a memcpy/memset.
-Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
if (TD == 0) return 0;
-
+
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(*TD);
-
+
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
@@ -381,43 +381,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
break;
continue;
}
-
+
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
-
+
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
-
+
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
Offset, *TD))
break;
-
+
Ranges.addStore(Offset, NextStore);
} else {
MemSetInst *MSI = cast<MemSetInst>(BI);
-
+
if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
!isa<ConstantInt>(MSI->getLength()))
break;
-
+
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
break;
-
+
Ranges.addMemSet(Offset, MSI);
}
}
-
+
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
return 0;
-
+
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
// interesting as a small compile-time optimization.
@@ -434,28 +434,28 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
-
+
if (Range.TheStores.size() == 1) continue;
-
+
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(*TD))
continue;
-
+
// Otherwise, we do want to transform this! Create a new memset.
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
- Type *EltType =
+ Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
-
- AMemSet =
+
+ AMemSet =
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
-
+
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i] << '\n';
@@ -473,14 +473,14 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
}
++NumMemSetInfer;
}
-
+
return AMemSet;
}
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
-
+
if (TD == 0) return false;
// Detect cases where we're performing call slot forwarding, but
@@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
bool changed = performCallSlotOptzn(LI,
- SI->getPointerOperand()->stripPointerCasts(),
+ SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
if (changed) {
@@ -524,10 +524,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
}
}
-
+
// There are two cases that are interesting for this code to handle: memcpy
// and memset. Right now we only handle memset.
-
+
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
@@ -537,7 +537,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
BBI = I; // Don't invalidate iterator.
return true;
}
-
+
return false;
}
@@ -680,7 +680,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (CS.getArgument(i)->getType() == cpyDest->getType())
CS.setArgument(i, cpyDest);
else
- CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
CS.getArgument(i)->getType(), cpyDest->getName(), C));
}
@@ -701,14 +701,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
/// copy from MDep's input if we can. MSize is the size of M's copy.
-///
+///
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
-
+
// If dep instruction is reading from our current input, then it is a noop
// transfer and substituting the input won't change this instruction. Just
// ignore the input and let someone else zap MDep. This handles cases like:
@@ -716,14 +716,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
// memcpy(b <- a)
if (M->getSource() == MDep->getSource())
return false;
-
+
// Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
-
+
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// Verify that the copied-from memory doesn't change in between the two
@@ -743,23 +743,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
false, M, M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
-
+
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
UseMemMove = true;
-
+
// If all checks passed, then we can transform M.
-
+
// Make sure to use the lesser of the alignment of the source and the dest
// since we're changing where we're reading from, but don't want to increase
// the alignment past what can be read from or written to.
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
-
+
IRBuilder<> Builder(M);
if (UseMemMove)
Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
@@ -839,13 +839,13 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
if (!TLI->has(LibFunc::memmove))
return false;
-
+
// See if the pointers alias.
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
-
+
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
-
+
// If not, then we know we can transform this.
Module *Mod = M->getParent()->getParent()->getParent();
Type *ArgTys[3] = { M->getRawDest()->getType(),
@@ -861,7 +861,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
++NumMoveToCpy;
return true;
}
-
+
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (TD == 0) return false;
@@ -884,7 +884,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (MDep == 0 || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
-
+
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
@@ -894,13 +894,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// then it is some target specific value that we can't know.
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
if (ByValAlign == 0) return false;
-
+
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
if (MDep->getAlignment() < ByValAlign &&
getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
return false;
-
+
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
// memcpy(a <- b)
@@ -915,16 +915,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
false, CS.getInstruction(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
-
+
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType())
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
"tmpcast", CS.getInstruction());
-
+
DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n"
<< " " << *CS.getInstruction() << "\n");
-
+
// Otherwise we're good! Update the byval argument.
CS.setArgument(ArgNo, TmpCast);
++NumMemCpyInstr;
@@ -940,9 +940,9 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
-
+
bool RepeatInstruction = false;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
@@ -964,7 +964,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
}
}
}
-
+
return MadeChange;
}
@@ -976,19 +976,19 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
TLI = &getAnalysis<TargetLibraryInfo>();
-
+
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
return false;
-
+
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
-
+
MD = 0;
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 87e17c7f90..3222f2083b 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -380,14 +380,14 @@ static InstructionClass GetBasicInstructionClass(const Value *V) {
return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
}
-/// IsRetain - Test if the the given class is objc_retain or
+/// IsRetain - Test if the given class is objc_retain or
/// equivalent.
static bool IsRetain(InstructionClass Class) {
return Class == IC_Retain ||
Class == IC_RetainRV;
}
-/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// IsAutorelease - Test if the given class is objc_autorelease or
/// equivalent.
static bool IsAutorelease(InstructionClass Class) {
return Class == IC_Autorelease ||
@@ -2829,7 +2829,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
}
S.IncrementNestCount();
- return NestingDetected;
+
+ // A retain can be a potential use; procede to the generic checking
+ // code below.
+ break;
}
case IC_Release: {
Arg = GetObjCArg(Inst);
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index bcf34b5256..09687d8909 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -375,7 +375,7 @@ typedef std::pair<Value*, APInt> RepeatedValue;
/// nodes in Ops along with their weights (how many times the leaf occurs). The
/// original expression is the same as
/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
-/// op
+/// op
/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
/// op
/// ...
@@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// Update the number of paths to the leaf.
IncorporateWeight(It->second, Weight, Opcode);
+#if 0 // TODO: Re-enable once PR13021 is fixed.
// The leaf already has one use from inside the expression. As we want
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
@@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
Leaves.erase(It);
continue;
}
+#endif
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
@@ -1581,7 +1583,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
- if (BO->hasOneUse() && BO->use_back()->getOpcode() == BO->getOpcode())
+ unsigned Opcode = BO->getOpcode();
+ if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode)
return;
// If this is an add tree that is used by a sub instruction, ignore it
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 98b0d5f6d5..ea1de63de7 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -59,7 +59,7 @@ namespace {
virtual bool runOnFunction(Function &F);
};
}
-
+
char RegToMem::ID = 0;
INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
false, false)
@@ -68,25 +68,25 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
false, false)
bool RegToMem::runOnFunction(Function &F) {
- if (F.isDeclaration())
+ if (F.isDeclaration())
return false;
-
+
// Insert all new allocas into entry block.
BasicBlock *BBEntry = &F.getEntryBlock();
assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
"Entry block to function must not have predecessors!");
-
+
// Find first non-alloca instruction and create insertion point. This is
// safe if block is well-formed: it always have terminator, otherwise
// we'll get and assertion.
BasicBlock::iterator I = BBEntry->begin();
while (isa<AllocaInst>(I)) ++I;
-
+
CastInst *AllocaInsertionPoint =
new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
Type::getInt32Ty(F.getContext()),
"reg2mem alloca point", I);
-
+
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
std::list<Instruction*> WorkList;
@@ -99,15 +99,15 @@ bool RegToMem::runOnFunction(Function &F) {
WorkList.push_front(&*iib);
}
}
-
+
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
ile = WorkList.end(); ilb != ile; ++ilb)
DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
-
+
WorkList.clear();
-
+
// Find all phi's
for (Function::iterator ibb = F.begin(), ibe = F.end();
ibb != ibe; ++ibb)
@@ -115,19 +115,18 @@ bool RegToMem::runOnFunction(Function &F) {
iib != iie; ++iib)
if (isa<PHINode>(iib))
WorkList.push_front(&*iib);
-
+
// Demote phi nodes
NumPhisDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
ile = WorkList.end(); ilb != ile; ++ilb)
DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
-
+
return true;
}
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
-//
char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 16b64a500b..2c39aab5cd 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -409,7 +409,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
-
+
if (Elt == 0)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 49970d4576..48318c8a55 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
// implements several scalar transformations over the LLVM intermediate
// representation, including the C bindings for that library.
//
@@ -24,12 +24,11 @@
using namespace llvm;
-/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
initializeBlockPlacementPass(Registry);
- initializeBoundsCheckingPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e3e3c9eb17..8090fdf178 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
@@ -104,7 +103,7 @@ namespace {
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
/// looping and avoid redundant work.
SmallPtrSet<PHINode*, 8> CheckedPHIs;
-
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
@@ -118,12 +117,12 @@ namespace {
/// ever accessed, or false if the alloca is only accessed with mem
/// intrinsics or load/store that only access the entire alloca at once.
bool hasSubelementAccess : 1;
-
+
/// hasALoadOrStore - This is true if there are any loads or stores to it.
/// The alloca may just be accessed with memcpy, for example, which would
/// not set this.
bool hasALoadOrStore : 1;
-
+
explicit AllocaInfo(AllocaInst *ai)
: AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
hasSubelementAccess(false), hasALoadOrStore(false) {}
@@ -183,11 +182,8 @@ namespace {
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
-
- static MemTransferInst *isOnlyCopiedFromConstantGlobal(
- AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
-
+
// SROA_DT - SROA that uses DominatorTree.
struct SROA_DT : public SROA {
static char ID;
@@ -196,7 +192,7 @@ namespace {
SROA(T, true, ID, ST, AT, SLT) {
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
}
-
+
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -204,7 +200,7 @@ namespace {
AU.setPreservesCFG();
}
};
-
+
// SROA_SSAUp - SROA that uses SSAUpdater.
struct SROA_SSAUp : public SROA {
static char ID;
@@ -213,14 +209,14 @@ namespace {
SROA(T, false, ID, ST, AT, SLT) {
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
}
-
+
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
};
-
+
}
char SROA_DT::ID = 0;
@@ -294,7 +290,7 @@ class ConvertToScalarInfo {
/// isn't possible to turn into a vector type, it gets set to VoidTy.
VectorType *VectorTy;
- /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
/// large integers unless there is some potential for optimization.
bool HadNonMemTransferAccess;
@@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- if (!GEP->hasAllConstantIndices())
- NonConstantIdx = Indices.pop_back_val();
+ Value* GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ assert(!NonConstantIdx &&
+ "Dynamic GEP reading from dynamic GEP unsupported");
+ GEPNonConstantIdx = Indices.pop_back_val();
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
continue;
}
@@ -1050,7 +1051,7 @@ public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
: LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
-
+
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
@@ -1065,18 +1066,18 @@ public:
LoadAndStorePromoter::run(Insts);
AI->eraseFromParent();
- for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
DDI->eraseFromParent();
}
- for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
DVI->eraseFromParent();
}
}
-
+
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
@@ -1085,7 +1086,7 @@ public:
}
virtual void updateDebugInfo(Instruction *Inst) const {
- for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
@@ -1093,7 +1094,7 @@ public:
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
}
- for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
Value *Arg = NULL;
@@ -1136,12 +1137,12 @@ public:
static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
-
+
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return false;
-
+
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
@@ -1151,7 +1152,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
LI->getAlignment(), TD))
return false;
}
-
+
return true;
}
@@ -1182,20 +1183,20 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return false;
-
+
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
if (LI->getParent() != BB) return false;
-
+
// Ensure that there are no instructions between the PHI and the load that
// could store.
for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
MaxAlign = std::max(MaxAlign, LI->getAlignment());
}
-
+
// Okay, we know that we have one or more loads in the same block as the PHI.
// We can transform this if it is safe to push the loads into the predecessor
// blocks. The only thing to watch out for is that we can't put a possibly
@@ -1223,10 +1224,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
if (InVal->isDereferenceablePointer() ||
isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
continue;
-
+
return false;
}
-
+
return true;
}
@@ -1238,7 +1239,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
-
+
for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
UI != UE; ++UI) {
User *U = *UI;
@@ -1247,7 +1248,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
return false;
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
@@ -1261,7 +1262,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
Value *Result = SI->getOperand(1+CI->isZero());
SI->replaceAllUsesWith(Result);
SI->eraseFromParent();
-
+
// This is very rare and we just scrambled the use list of AI, start
// over completely.
return tryToMakeAllocaBePromotable(AI, TD);
@@ -1271,33 +1272,33 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// loads, then we can transform this by rewriting the select.
if (!isSafeSelectToSpeculate(SI, TD))
return false;
-
+
InstsToRewrite.insert(SI);
continue;
}
-
+
if (PHINode *PN = dyn_cast<PHINode>(U)) {
if (PN->use_empty()) { // Dead PHIs can be stripped.
InstsToRewrite.insert(PN);
continue;
}
-
+
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
if (!isSafePHIToSpeculate(PN, TD))
return false;
-
+
InstsToRewrite.insert(PN);
continue;
}
-
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (onlyUsedByLifetimeMarkers(BCI)) {
InstsToRewrite.insert(BCI);
continue;
}
}
-
+
return false;
}
@@ -1305,7 +1306,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// we're done!
if (InstsToRewrite.empty())
return true;
-
+
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
@@ -1326,13 +1327,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// loads with a new select.
while (!SI->use_empty()) {
LoadInst *LI = cast<LoadInst>(SI->use_back());
-
+
IRBuilder<> Builder(LI);
- LoadInst *TrueLoad =
+ LoadInst *TrueLoad =
Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
- LoadInst *FalseLoad =
+ LoadInst *FalseLoad =
Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
-
+
// Transfer alignment and TBAA info if present.
TrueLoad->setAlignment(LI->getAlignment());
FalseLoad->setAlignment(LI->getAlignment());
@@ -1340,18 +1341,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
}
-
+
Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
V->takeName(LI);
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
}
-
+
// Now that all the loads are gone, the select is gone too.
SI->eraseFromParent();
continue;
}
-
+
// Otherwise, we have a PHI node which allows us to push the loads into the
// predecessors.
PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
@@ -1359,7 +1360,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
PN->eraseFromParent();
continue;
}
-
+
Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
@@ -1369,18 +1370,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
-
+
// Rewrite all loads of the PN to use the new PHI.
while (!PN->use_empty()) {
LoadInst *LI = cast<LoadInst>(PN->use_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
-
+
// Inject loads into all of the pred blocks. Keep track of which blocks we
// insert them into in case we have multiple edges from the same block.
DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
@@ -1391,13 +1392,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
Load->setAlignment(Align);
if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
}
-
+
NewPN->addIncoming(Load, Pred);
}
-
+
PN->eraseFromParent();
}
-
+
++NumAdjusted;
return true;
}
@@ -1430,7 +1431,7 @@ bool SROA::performPromotion(Function &F) {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
AllocaInst *AI = Allocas[i];
-
+
// Build list of instructions to promote.
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
@@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
return false;
}
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- (CE->getOpcode() == Instruction::GetElementPtr &&
- cast<GEPOperator>(CE)->hasAllZeroIndices()))
- return getPointeeAlignment(CE->getOperand(0), TD);
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (!GV->isDeclaration())
- return TD.getPreferredAlignment(GV);
-
- if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
- return TD.getABITypeAlignment(PT->getElementType());
-
- return 0;
-}
-
-
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
// if they are only used by getelementptr instructions.
@@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) {
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
- // Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global whose alignment is equal to or exceeds that of the
- // allocation. If this is the case, we can change all users to use
- // the constant global instead. This is commonly produced by the CFE by
- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
- // is only subsequently read.
- SmallVector<Instruction *, 4> ToDelete;
- if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
- }
- }
-
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -1667,12 +1625,12 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
-
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
-
+
Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
@@ -1689,7 +1647,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
if (Info.isUnsafe) return;
}
}
-
+
/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
/// derived from the alloca, we can often still split the alloca into elements.
@@ -1706,10 +1664,10 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (PHINode *PN = dyn_cast<PHINode>(I))
if (!Info.CheckedPHIs.insert(PN))
return;
-
+
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
isSafePHISelectUseForScalarRepl(BC, Offset, Info);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
@@ -1726,12 +1684,12 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
-
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
-
+
Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
@@ -1925,12 +1883,12 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
RewriteBitCast(BC, AI, Offset, NewElts);
continue;
}
-
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
continue;
}
-
+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
@@ -1949,10 +1907,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
Type *LIType = LI->getType();
-
+
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
// %res = load { i32, i32 }* %alloc
@@ -1978,7 +1936,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
Type *SIType = Val->getType();
@@ -2005,16 +1963,16 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (isa<SelectInst>(User) || isa<PHINode>(User)) {
- // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
// bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
// the new pointer.
if (!isa<AllocaInst>(I)) continue;
-
+
assert(Offset == 0 && NewElts[0] &&
"Direct alloca use should have a zero offset");
-
+
// If we have a use of the alloca, we know the derived uses will be
// utilizing just the first element of the scalarized result. Insert a
// bitcast of the first alloca before the user as required.
@@ -2386,7 +2344,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
-
+
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
@@ -2648,137 +2606,6 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return false;
}
}
-
- return true;
-}
-
-
-
-/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
-/// some part of a constant global variable. This intentionally only accepts
-/// constant expressions because we don't can't rewrite arbitrary instructions.
-static bool PointsToConstantGlobal(Value *V) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr)
- return PointsToConstantGlobal(CE->getOperand(0));
- return false;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
-/// pointer to an alloca. Ignore any reads of the pointer, return false if we
-/// see any stores or other unknown uses. If we see pointer arithmetic, keep
-/// track of whether it moves the pointer (with isOffset) but otherwise traverse
-/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
-/// can optimize this.
-static bool
-isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- bool isOffset,
- SmallVector<Instruction *, 4> &LifetimeMarkers) {
- // We track lifetime intrinsics as we encounter them. If we decide to go
- // ahead and replace the value with the global, this lets the caller quickly
- // eliminate the markers.
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- User *U = cast<Instruction>(*UI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Ignore non-volatile loads, they are always ok.
- if (!LI->isSimple()) return false;
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- // If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
- LifetimeMarkers))
- return false;
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // If the GEP has all zero indices, it doesn't offset the pointer. If it
- // doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
- isOffset || !GEP->hasAllZeroIndices(),
- LifetimeMarkers))
- return false;
- continue;
- }
-
- if (CallSite CS = U) {
- // If this is the function being called then we treat it like a load and
- // ignore it.
- if (CS.isCallee(UI))
- continue;
-
- // If this is a readonly/readnone call site, then we know it is just a
- // load (but one that potentially returns the value itself), so we can
- // ignore it if we know that the value isn't captured.
- unsigned ArgNo = CS.getArgumentNo(UI);
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
- continue;
-
- // If this is being passed as a byval argument, the caller is making a
- // copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
- continue;
- }
-
- // Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- LifetimeMarkers.push_back(II);
- continue;
- }
- }
-
- // If this is isn't our memcpy/memmove, reject it as something we can't
- // handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
- if (MI == 0)
- return false;
-
- // If the transfer is using the alloca as a source of the transfer, then
- // ignore it since it is a load (unless the transfer is volatile).
- if (UI.getOperandNo() == 1) {
- if (MI->isVolatile()) return false;
- continue;
- }
-
- // If we already have seen a copy, reject the second one.
- if (TheCopy) return false;
- // If the pointer has been offset from the start of the alloca, we can't
- // safely handle this.
- if (isOffset) return false;
-
- // If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 0) return false;
-
- // If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getSource()))
- return false;
-
- // Otherwise, the transform is safe. Remember the copy instruction.
- TheCopy = MI;
- }
return true;
}
-
-/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
-/// modified by a copy from a constant global. If we can prove this, we can
-/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *
-SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
- SmallVector<Instruction*, 4> &ToDelete) {
- MemTransferInst *TheCopy = 0;
- if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
- return TheCopy;
- return 0;
-}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 91158b429e..d13e4abff9 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -67,7 +67,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
// nodes.
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
(*SI)->removePredecessor(BB);
-
+
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
if (UseLLVMTrap) {
@@ -77,7 +77,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
CallTrap->setDebugLoc(I->getDebugLoc());
}
new UnreachableInst(I->getContext(), I);
-
+
// All instructions after this are dead.
BasicBlock::iterator BBI = I, BBE = BB->end();
while (BBI != BBE) {
@@ -107,13 +107,13 @@ static void ChangeToCall(InvokeInst *II) {
static bool MarkAliveBlocks(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
+
SmallVector<BasicBlock*, 128> Worklist;
Worklist.push_back(BB);
bool Changed = false;
do {
BB = Worklist.pop_back_val();
-
+
if (!Reachable.insert(BB))
continue;
@@ -135,7 +135,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
break;
}
}
-
+
// Store to undef and store to null are undefined and used to signal that
// they should be changed to unreachable by passes that can't modify the
// CFG.
@@ -144,7 +144,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (SI->isVolatile()) continue;
Value *Ptr = SI->getOperand(1);
-
+
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
@@ -165,6 +165,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (II->use_empty() && II->onlyReadsMemory()) {
// jump to the normal destination branch.
BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
II->eraseFromParent();
} else
ChangeToCall(II);
@@ -179,38 +180,38 @@ static bool MarkAliveBlocks(BasicBlock *BB,
return Changed;
}
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
static bool RemoveUnreachableBlocksFromFn(Function &F) {
SmallPtrSet<BasicBlock*, 128> Reachable;
bool Changed = MarkAliveBlocks(F.begin(), Reachable);
-
+
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
return Changed;
-
+
assert(Reachable.size() < F.size());
NumSimpl += F.size()-Reachable.size();
-
+
// Loop over all of the basic blocks that are not reachable, dropping all of
// their internal references...
for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
if (Reachable.count(BB))
continue;
-
+
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(BB);
BB->dropAllReferences();
}
-
+
for (Function::iterator I = ++F.begin(); I != F.end();)
if (!Reachable.count(I))
I = F.getBasicBlockList().erase(I);
else
++I;
-
+
return true;
}
@@ -218,17 +219,17 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
/// node) return blocks, merge them together to promote recursive block merging.
static bool MergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
-
+
BasicBlock *RetBlock = 0;
-
+
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
BasicBlock &BB = *BBI++;
-
+
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
if (Ret == 0) continue;
-
+
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
if (Ret != &BB.front()) {
@@ -250,21 +251,21 @@ static bool MergeEmptyReturnBlocks(Function &F) {
RetBlock = &BB;
continue;
}
-
+
// Otherwise, we found a duplicate return block. Merge the two.
Changed = true;
-
+
// Case when there is no input to the return or when the returned values
// agree is trivial. Note that they can't agree if there are phis in the
// blocks.
if (Ret->getNumOperands() == 0 ||
- Ret->getOperand(0) ==
+ Ret->getOperand(0) ==
cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
BB.replaceAllUsesWith(RetBlock);
BB.eraseFromParent();
continue;
}
-
+
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
if (RetBlockPHI == 0) {
@@ -273,12 +274,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
std::distance(PB, PE), "merge",
&RetBlock->front());
-
+
for (pred_iterator PI = PB; PI != PE; ++PI)
RetBlockPHI->addIncoming(InVal, *PI);
RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
}
-
+
// Turn BB into a block that just unconditionally branches to the return
// block. This handles the case when the two return blocks have a common
// predecessor but that return different things.
@@ -286,7 +287,7 @@ static bool MergeEmptyReturnBlocks(Function &F) {
BB.getTerminator()->eraseFromParent();
BranchInst::Create(RetBlock, &BB);
}
-
+
return Changed;
}
@@ -297,7 +298,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
-
+
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
@@ -326,7 +327,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
// IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
// RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
+ // avoid reruning IterativeSimplifyCFG if the second pass of
// RemoveUnreachableBlocksFromFn doesn't do anything.
if (!RemoveUnreachableBlocksFromFn(F))
return true;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 39647c7fc6..3904419012 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -100,7 +100,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
}
return true;
}
-
+
static bool CallHasFloatingPointArgument(const CallInst *CI) {
for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
it != e; ++it) {
@@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitStrLenMemCpy(Src, Dst, Len, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, Len, B);
}
- void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD);
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization {
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
}
};
@@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt {
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
- EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
}
};
@@ -254,9 +255,9 @@ struct StrChrOpt : public LibCallOptimization {
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD);
+ B, TD, TLI);
}
-
+
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
@@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD);
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
return 0;
}
@@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization {
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD);
+ std::min(Len1, Len2)), B, TD, TLI);
}
return 0;
@@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall ||
+ !EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD, TLI))
B.CreateMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
@@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization {
if (!TD) return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // stpcpy(x,x) -> x+strlen(x)
- return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD));
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
+ TD, TLI))
B.CreateMemCpy(Dst, Src, LenV, 1);
return DstEnd;
}
@@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// strpbrk(s, "a") -> strchr(s, 'a')
if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
return 0;
}
@@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization {
// strcspn(s, "") -> strlen(s)
if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD);
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
return 0;
}
@@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD);
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
@@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization {
}
// fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
- ToFindStr[0], B, TD), CI->getType());
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
return 0;
}
};
@@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- EmitPutS(GV, B, TD);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
- if (CI->use_empty()) return CI;
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- EmitPutS(CI->getArgOperand(1), B, TD);
- return CI;
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
}
return 0;
}
@@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- EmitFPutC(Char, CI->getArgOperand(3), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
return 0;
@@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization {
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
- return CI; // Known to have no uses (see above).
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return ConstantInt::get(CI->getType(), FormatStr.size());
+ Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+ TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
- return CI;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
return 0;
}
@@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1514,7 +1522,7 @@ namespace {
///
class SimplifyLibCalls : public FunctionPass {
TargetLibraryInfo *TLI;
-
+
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
@@ -1534,7 +1542,7 @@ namespace {
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
PutsOpt Puts;
-
+
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
@@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
+ if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
+ Optimizations["fabs"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
Optimizations["floor"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
@@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["rint"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
Optimizations["nearbyint"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
+ Optimizations["trunc"] = &UnaryDoubleFP;
// Integer Optimizations
Optimizations["ffs"] = &FFS;
@@ -1767,7 +1779,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
FunctionType *FTy = F.getFunctionType();
-
+
StringRef Name = F.getName();
switch (Name[0]) {
case 's':
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index d6ec65169d..34f1d6c622 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -40,9 +40,9 @@ namespace {
Sinking() : FunctionPass(ID) {
initializeSinkingPass(*PassRegistry::getPassRegistry());
}
-
+
virtual bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
@@ -59,7 +59,7 @@ namespace {
bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
};
} // end anonymous namespace
-
+
char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -71,7 +71,7 @@ FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified value
/// occur in blocks dominated by the specified block.
-bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
BasicBlock *BB) const {
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
@@ -101,18 +101,18 @@ bool Sinking::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
bool MadeChange, EverMadeChange = false;
-
+
do {
MadeChange = false;
DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
// Process all basic blocks.
- for (Function::iterator I = F.begin(), E = F.end();
+ for (Function::iterator I = F.begin(), E = F.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
EverMadeChange |= MadeChange;
NumSinkIter++;
} while (MadeChange);
-
+
return EverMadeChange;
}
@@ -121,8 +121,8 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
- // unprofitable, it can also lead to infinite looping, because in an unreachable
- // loop there may be nowhere to stop.
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
if (!DT->isReachableFromEntry(&BB)) return false;
bool MadeChange = false;
@@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
SmallPtrSet<Instruction *, 8> Stores;
do {
Instruction *Inst = I; // The instruction to sink.
-
+
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
ProcessedBegin = I == BB.begin();
@@ -146,10 +146,10 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
if (SinkInstruction(Inst, Stores))
++NumSunk, MadeChange = true;
-
+
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
-
+
return MadeChange;
}
@@ -177,16 +177,17 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
/// IsAcceptableTarget - Return true if it is possible to sink the instruction
/// in the specified basic block.
-bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const {
+bool Sinking::IsAcceptableTarget(Instruction *Inst,
+ BasicBlock *SuccToSinkTo) const {
assert(Inst && "Instruction to be sunk is null");
assert(SuccToSinkTo && "Candidate sink target is null");
-
+
// It is not possible to sink an instruction into its own block. This can
// happen with loops.
if (Inst->getParent() == SuccToSinkTo)
return false;
-
- // If the block has multiple predecessors, this would introduce computation
+
+ // If the block has multiple predecessors, this would introduce computation
// on different code paths. We could split the critical edge, but for now we
// just punt.
// FIXME: Split critical edges if not backedges.
@@ -195,18 +196,19 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) co
// other code paths.
if (!isSafeToSpeculativelyExecute(Inst))
return false;
-
+
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
if (!DT->dominates(Inst->getParent(), SuccToSinkTo))
return false;
-
+
// Don't sink instructions into a loop.
- Loop *succ = LI->getLoopFor(SuccToSinkTo), *cur = LI->getLoopFor(Inst->getParent());
+ Loop *succ = LI->getLoopFor(SuccToSinkTo);
+ Loop *cur = LI->getLoopFor(Inst->getParent());
if (succ != 0 && succ != cur)
return false;
}
-
+
// Finally, check that all the uses of the instruction are actually
// dominated by the candidate
return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
@@ -219,7 +221,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// Check if it's safe to move the instruction.
if (!isSafeToMove(Inst, AA, Stores))
return false;
-
+
// FIXME: This should include support for sinking instructions within the
// block they are currently in to shorten the live ranges. We often get
// instructions sunk into the top of a large block, but it would be better to
@@ -227,41 +229,41 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// be careful not to *increase* register pressure though, e.g. sinking
// "x = y + z" down if it kills y and z would increase the live ranges of y
// and z and only shrink the live range of x.
-
+
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
BasicBlock *SuccToSinkTo = 0;
-
+
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
DomTreeNode *DTN = DT->getNode(Inst->getParent());
- for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
+ for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
I != E && SuccToSinkTo == 0; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
- if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
+ if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
IsAcceptableTarget(Inst, Candidate))
SuccToSinkTo = Candidate;
}
- // If no suitable postdominator was found, look at all the successors and
+ // If no suitable postdominator was found, look at all the successors and
// decide which one we should sink to, if any.
- for (succ_iterator I = succ_begin(Inst->getParent()),
+ for (succ_iterator I = succ_begin(Inst->getParent()),
E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
if (IsAcceptableTarget(Inst, *I))
SuccToSinkTo = *I;
}
-
+
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
return false;
-
+
DEBUG(dbgs() << "Sink" << *Inst << " (";
- WriteAsOperand(dbgs(), Inst->getParent(), false);
+ WriteAsOperand(dbgs(), Inst->getParent(), false);
dbgs() << " -> ";
- WriteAsOperand(dbgs(), SuccToSinkTo, false);
+ WriteAsOperand(dbgs(), SuccToSinkTo, false);
dbgs() << ")\n");
-
+
// Move the instruction.
Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
return true;
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 074d0325d9..6557d630a9 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -172,7 +172,7 @@ bool TailCallElim::runOnFunction(Function &F) {
FunctionContainsEscapingAllocas |=
CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
}
-
+
/// FIXME: The code generator produces really bad code when an 'escaping
/// alloca' is changed from being a static alloca to being a dynamic alloca.
/// Until this is resolved, disable this transformation if that would ever
@@ -234,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
-
+
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
@@ -364,7 +364,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
if (&BB->front() == TI) // Make sure there is something before the terminator.
return 0;
-
+
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = 0;
@@ -388,7 +388,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
- if (BB == &F->getEntryBlock() &&
+ if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
FirstNonDbg(llvm::next(BB->begin())) == TI &&
callIsSmall(CI)) {
@@ -432,7 +432,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock::iterator BBI = CI;
for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
-
+
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5576432149..2679b933f6 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// If the return instruction returns a value, and if the value was a
// PHI node in "BB", propagate the right value into the return.
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i)
- if (PHINode *PN = dyn_cast<PHINode>(*i))
- if (PN->getParent() == BB)
- *i = PN->getIncomingValueForBlock(Pred);
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet, NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
// Update any PHI nodes in the returning block to realize that we no
// longer branch to them.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f7724417..e13fd716fa 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strlen))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
return CI;
}
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer. Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strnlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitStrChr - Emit a call to the strchr function to the builder, for the
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const TargetData *TD) {
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI =
AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strncmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetData *TD, StringRef Name) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ if (!TLI->has(LibFunc::strcpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc::strncpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcpy_chk))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
@@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
/// EmitMemCmp - Emit a call to the memcmp function.
Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::putchar))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
B.getInt32Ty(), NULL);
@@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::puts))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
-
+ return CI;
}
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
-void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const TargetData *TD) {
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputc))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
-void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputs))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fwrite))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// We really need TargetData for later.
if (!TD) return false;
@@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// string lengths for varying.
if (isFoldable(2, 1, true)) {
Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- Name.substr(2, 6));
+ TLI, Name.substr(2, 6));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
@@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index f96581393d..02bdcda391 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/BasicBlock.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MDBuilder.h"
-#include "llvm/ADT/Statistic.h"
#include <vector>
using namespace llvm;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b3f5289fcd..72d4199a2a 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
: AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
@@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) {
else
V = GetValueInMiddleOfBlock(User->getParent());
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
U.set(V);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f37ea91397..518df7cdda 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -20,6 +20,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MDBuilder.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -1330,7 +1330,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
- // If we folded the the first phi, PN dangles at this point. Refresh it. If
+ // If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
if (PN == 0) return true;
@@ -1550,7 +1550,7 @@ static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
/// checkCSEInPredecessor - Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
///
-bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
+static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
return false;
for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) {