diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-08-21 17:32:13 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-08-21 17:32:13 -0700 |
commit | 66f271497ed92ebb05c66f54616e512606a2e314 (patch) | |
tree | 96d54cd64804ab7c9f2f52f680c3301aa789ce1d /lib/Transforms | |
parent | b62e9abf7dd9e39c95327914ce9dfe216386824a (diff) | |
parent | bc363931085587bac42a40653962a3e5acd1ffce (diff) |
Merge up to r162331, git commit bc363931085587bac42a40653962a3e5acd1ffce
Diffstat (limited to 'lib/Transforms')
51 files changed, 1866 insertions, 1176 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 95a3b3d7c8..b94dd69deb 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -245,10 +245,7 @@ static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, const ArgPromotion::IndicesVector &Longer) { if (Prefix.size() > Longer.size()) return false; - for (unsigned i = 0, e = Prefix.size(); i != e; ++i) - if (Prefix[i] != Longer[i]) - return false; - return true; + return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); } diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 51d0a002e1..b2748f2e6c 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -53,11 +53,11 @@ namespace { I != E; ++I) { if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { I->setInitializer(0); - } else { - if (I->hasAvailableExternallyLinkage()) - continue; - if (I->getName() == "llvm.global_ctors") - continue; + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + if (I->getName() == "llvm.global_ctors") + continue; // @LOCALMOD-BEGIN - this is likely upstreamable // Note: there will likely be more cases once this // is exercises more thorougly. @@ -67,7 +67,7 @@ namespace { if (I->hasExternalWeakLinkage()) continue; // @LOCALMOD-END - } + } if (I->hasLocalLinkage()) I->setVisibility(GlobalValue::HiddenVisibility); @@ -78,9 +78,9 @@ namespace { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { I->deleteBody(); - } else { - if (I->hasAvailableExternallyLinkage()) - continue; + } else { + if (I->hasAvailableExternallyLinkage()) + continue; // @LOCALMOD-BEGIN - this is likely upstreamable // Note: there will likely be more cases once this // is exercises more thorougly. diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4e1c23c198..6d950d2024 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -296,6 +296,168 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, return false; } +/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker +/// as a root? If so, we might not really want to eliminate the stores to it. +static bool isLeakCheckerRoot(GlobalVariable *GV) { + // A global variable is a root if it is a pointer, or could plausibly contain + // a pointer. There are two challenges; one is that we could have a struct + // the has an inner member which is a pointer. We recurse through the type to + // detect these (up to a point). The other is that we may actually be a union + // of a pointer and another type, and so our LLVM type is an integer which + // gets converted into a pointer, or our type is an [i8 x #] with a pointer + // potentially contained here. + + if (GV->hasPrivateLinkage()) + return false; + + SmallVector<Type *, 4> Types; + Types.push_back(cast<PointerType>(GV->getType())->getElementType()); + + unsigned Limit = 20; + do { + Type *Ty = Types.pop_back_val(); + switch (Ty->getTypeID()) { + default: break; + case Type::PointerTyID: return true; + case Type::ArrayTyID: + case Type::VectorTyID: { + SequentialType *STy = cast<SequentialType>(Ty); + Types.push_back(STy->getElementType()); + break; + } + case Type::StructTyID: { + StructType *STy = cast<StructType>(Ty); + if (STy->isOpaque()) return true; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + Type *InnerTy = *I; + if (isa<PointerType>(InnerTy)) return true; + if (isa<CompositeType>(InnerTy)) + Types.push_back(InnerTy); + } + break; + } + } + if (--Limit == 0) return true; + } while (!Types.empty()); + return false; +} + +/// Given a value that is stored to a global but never read, determine whether +/// it's safe to remove the store and the chain of computation that feeds the +/// store. +static bool IsSafeComputationToRemove(Value *V) { + do { + if (isa<Constant>(V)) + return true; + if (!V->hasOneUse()) + return false; + if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || + isa<GlobalValue>(V)) + return false; + if (isAllocationFn(V)) + return true; + + Instruction *I = cast<Instruction>(V); + if (I->mayHaveSideEffects()) + return false; + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + if (!GEP->hasAllConstantIndices()) + return false; + } else if (I->getNumOperands() != 1) { + return false; + } + + V = I->getOperand(0); + } while (1); +} + +/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users +/// of the global and clean up any that obviously don't assign the global a +/// value that isn't dynamically allocated. +/// +static bool CleanupPointerRootUsers(GlobalVariable *GV) { + // A brief explanation of leak checkers. The goal is to find bugs where + // pointers are forgotten, causing an accumulating growth in memory + // usage over time. The common strategy for leak checkers is to whitelist the + // memory pointed to by globals at exit. This is popular because it also + // solves another problem where the main thread of a C++ program may shut down + // before other threads that are still expecting to use those globals. To + // handle that case, we expect the program may create a singleton and never + // destroy it. + + bool Changed = false; + + // If Dead[n].first is the only use of a malloc result, we can delete its + // chain of computation and the store to the global in Dead[n].second. + SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; + + // Constants can't be pointers to dynamically allocated memory. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E;) { + User *U = *UI++; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + Value *V = SI->getValueOperand(); + if (isa<Constant>(V)) { + Changed = true; + SI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(V)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, SI)); + } + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { + if (isa<Constant>(MSI->getValue())) { + Changed = true; + MSI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MSI)); + } + } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { + GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); + if (MemSrc && MemSrc->isConstant()) { + Changed = true; + MTI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MTI)); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (Constant *C = dyn_cast<Constant>(U)) { + if (SafeToDestroyConstant(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + Dead.clear(); + CleanupPointerRootUsers(GV); + return true; + } + } + } + + for (int i = 0, e = Dead.size(); i != e; ++i) { + if (IsSafeComputationToRemove(Dead[i].first)) { + Dead[i].second->eraseFromParent(); + Instruction *I = Dead[i].first; + do { + if (isAllocationFn(I)) + break; + Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); + if (!J) + break; + I->eraseFromParent(); + I = J; + } while (1); + I->eraseFromParent(); + } + } + + return Changed; +} + /// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This @@ -812,13 +974,18 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); - CleanupConstantGlobalUsers(GV, 0, TD, TLI); + if (isLeakCheckerRoot(GV)) { + Changed |= CleanupPointerRootUsers(GV); + } else { + Changed = true; + CleanupConstantGlobalUsers(GV, 0, TD, TLI); + } if (GV->use_empty()) { + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + Changed = true; GV->eraseFromParent(); ++NumDeleted; } - Changed = true; } return Changed; } @@ -1794,10 +1961,15 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!GS.isLoaded) { DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), - TD, TLI); + bool Changed; + if (isLeakCheckerRoot(GV)) { + // Delete any constant stores to the global. + Changed = CleanupPointerRootUsers(GV); + } else { + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + } // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1845,7 +2017,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (GV->use_empty()) { DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); + << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } else { diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 715a384adc..9f70f668a8 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -389,7 +389,7 @@ bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { if (!C2) return false; // TODO: constant expressions with GEP or references to F1 or F2. if (C1->isNullValue() && C2->isNullValue() && - isEquivalentType(C1->getType(), C2->getType())) + isEquivalentType(C1->getType(), C2->getType())) return true; // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 // then they must have equal bit patterns. diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index d8e8cf77dd..80bfc1cdb2 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -27,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" @@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { // Strip any named types of their names. static void StripTypeNames(Module &M, bool PreserveDbgInfo) { - std::vector<StructType*> StructTypes; - M.findUsedStructTypes(StructTypes); + TypeFinder StructTypes; + StructTypes.run(M, false); for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index c2b0e03b40..0d5ef904ee 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -187,7 +187,7 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitMalloc(Instruction &FI); + Instruction *visitAllocSite(Instruction &FI); Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index f74cff85c6..cbe1ca4ddc 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. + uint64_t Size = MemOpLength->getLimitedValue(); + assert(Size && "0-sized memory transfering should be removed already."); if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. @@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop + assert(Len && "0-sized memory setting should be removed already."); // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { @@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } @@ -880,7 +878,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // Instruction *InstCombiner::visitCallSite(CallSite CS) { if (isAllocLikeFn(CS.getInstruction())) - return visitMalloc(*CS.getInstruction()); + return visitAllocSite(*CS.getInstruction()); bool Changed = false; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7076d88554..c3fc18c300 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; @@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return Res; } break; + case Instruction::Call: { + CallInst *CI = cast<CallInst>(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } } } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b9df5eb81e..6ecb4c52c4 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,72 +20,153 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); - -// Try to kill dead allocas by walking through its uses until we see some use -// that could escape. This is a conservative analysis which tries to handle -// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things -// left after inlining and SROA finish chewing on an alloca. -static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { - SmallVector<Instruction *, 4> Worklist, DeadStores; - Worklist.push_back(&AI); - do { - Instruction *PI = Worklist.pop_back_val(); - for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); - UI != UE; ++UI) { - Instruction *I = cast<Instruction>(*UI); - switch (I->getOpcode()) { - default: - // Give up the moment we see something we can't handle. - return 0; - - case Instruction::GetElementPtr: - case Instruction::BitCast: - Worklist.push_back(I); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, + IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) continue; - case Instruction::Call: - // We can handle a limited subset of calls to no-op intrinsics. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - continue; - default: - return 0; - } - } - // Reject everything else. - return 0; - - case Instruction::Store: { - // Stores into the alloca are only live if the alloca is live. - StoreInst *SI = cast<StoreInst>(I); - // We can eliminate atomic stores, but not volatile. - if (SI->isVolatile()) - return 0; - // The store is only trivially safe if the poniter is the destination - // as opposed to the value. We're conservative here and don't check for - // the case where we store the address of a dead alloca into a dead - // alloca. - if (SI->getPointerOperand() != PI) - return 0; - DeadStores.push_back(I); + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); continue; } - } } - } while (!Worklist.empty()); - // The alloca is dead. Kill off all the stores to it, and then replace it - // with undef. - while (!DeadStores.empty()) - IC.EraseInstFromFunction(*DeadStores.pop_back_val()); - return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl<Instruction *> &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast<GEPOperator>(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast<PointerType>(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; } Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { @@ -179,10 +260,32 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - // Try to aggressively remove allocas which are only used for GEPs, lifetime - // markers, and stores. This happens when SROA iteratively promotes stores - // out of the alloca, and we need to cleanup after it. - return removeDeadAlloca(*this, AI); + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + + // At last, use the generic allocation site handler to aggressively remove + // unused allocas. + return visitAllocSite(AI); } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index eb9945b681..291e80019e 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + } + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 125c74a89a..54be8ed3fa 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } break; } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } case Instruction::BitCast: { // Vector->vector casts only. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); @@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; case Instruction::Call: { IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index c5124bf7b2..68ecd51604 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -207,7 +207,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. if (MaintainNoSignedWrap(I, B, C) && - (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. I.clearSubclassOptionalData(); @@ -1106,54 +1106,89 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { -static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, - int Depth = 0) { - if (Depth == 8) - return false; +static bool +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { + SmallVector<Instruction*, 4> Worklist; + Worklist.push_back(AI); - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - User *U = *UI; - if (isFreeCall(U)) { - Users.push_back(U); - continue; - } - if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { - Users.push_back(ICI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; + ++UI) { + Instruction *I = cast<Instruction>(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return false; + + case Instruction::BitCast: + case Instruction::GetElementPtr: + Users.push_back(I); + Worklist.push_back(I); continue; - } - } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { - Users.push_back(BCI); + + case Instruction::ICmp: { + ICmpInst *ICI = cast<ICmpInst>(I); + // We can fold eq/ne comparisons with null to false/true, respectively. + if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + Users.push_back(I); continue; } - } - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { - Users.push_back(GEPI); + + case Instruction::Call: + // Ignore no-op and store intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: { + MemIntrinsic *MI = cast<MemIntrinsic>(II); + if (MI->isVolatile() || MI->getRawDest() != PI) + return false; + } + // fall through + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + Users.push_back(I); + continue; + } + } + + if (isFreeCall(I)) { + Users.push_back(I); + continue; + } + return false; + + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + if (SI->isVolatile() || SI->getPointerOperand() != PI) + return false; + Users.push_back(I); continue; } - } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - Users.push_back(II); - continue; } + llvm_unreachable("missing a return?"); } - return false; - } + } while (!Worklist.empty()); return true; } -Instruction *InstCombiner::visitMalloc(Instruction &MI) { +Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (IsOnlyNullComparedAndFreed(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1164,6 +1199,12 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { C->isFalseWhenEqual())); } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::objectsize) { + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); + uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); + } } EraseInstFromFunction(*I); } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 482ebef2a2..06f4d2fedd 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -18,6 +18,7 @@ #include "FunctionBlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -60,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; +static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; @@ -72,6 +75,9 @@ static const int kAsanStackMidRedzoneMagic = 0xf2; static const int kAsanStackRightRedzoneMagic = 0xf3; static const int kAsanStackPartialRedzoneMagic = 0xf4; +// Accesses sizes are powers of two: 1, 2, 4, 8, 16. +static const size_t kNumberOfAccessSizes = 5; + // Command-line flags. // This flag may need to be replaced with -f[no-]asan-reads. @@ -82,7 +88,10 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); -// This flags limits the number of instructions to be instrumented +static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path", + cl::desc("use instrumentation with slow path for all accesses"), + cl::Hidden, cl::init(false)); +// This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary // set it to 10000. @@ -99,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInitializers("asan-initialization-order", + cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -fasan-blacklist. @@ -138,21 +149,34 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), namespace { +/// An object of this type is created while instrumenting every function. +struct AsanFunctionContext { + AsanFunctionContext(Function &Function) : F(Function) { } + + Function &F; +}; + /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public ModulePass { AddressSanitizer(); virtual const char *getPassName() const; - void instrumentMop(Instruction *I); - void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, + void instrumentMop(AsanFunctionContext &AFC, Instruction *I); + void instrumentAddress(AsanFunctionContext &AFC, + Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); - Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr, - bool IsWrite, uint32_t TypeSize); - bool instrumentMemIntrinsic(MemIntrinsic *MI); - void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, - Value *Size, + Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, + Value *ShadowValue, uint32_t TypeSize); + Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, + bool IsWrite, size_t AccessSizeIndex); + bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); + void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, + Instruction *OrigIns, Value *Addr, + Value *Size, Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool handleFunction(Module &M, Function &F); + void createInitializerPoisonCalls(Module &M, + Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool poisonStackInFunction(Module &M, Function &F); virtual bool runOnModule(Module &M); @@ -160,7 +184,6 @@ struct AddressSanitizer : public ModulePass { static char ID; // Pass identification, replacement for typeid private: - uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -176,11 +199,13 @@ struct AddressSanitizer : public ModulePass { } Function *checkInterfaceFunction(Constant *FuncOrBitcast); + bool ShouldInstrumentGlobal(GlobalVariable *G); void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); + void FindDynamicInitializers(Module &M); + bool HasDynamicInitializer(GlobalVariable *G); - Module *CurrentModule; LLVMContext *C; TargetData *TD; uint64_t MappingOffset; @@ -193,7 +218,12 @@ struct AddressSanitizer : public ModulePass { Function *AsanInitFunction; Instruction *CtorInsertBefore; OwningPtr<FunctionBlackList> BL; + // This array is indexed by AccessIsWrite and log2(AccessSize). + Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + InlineAsm *EmptyAsm; + SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; }; + } // namespace char AddressSanitizer::ID = 0; @@ -209,6 +239,12 @@ const char *AddressSanitizer::getPassName() const { return "AddressSanitizer"; } +static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { + size_t Res = CountTrailingZeros_32(TypeSize / 8); + assert(Res < kNumberOfAccessSizes); + return Res; +} + // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); @@ -227,19 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { // ThenBlock // Tail // -// Returns the ThenBlock's terminator. -static BranchInst *splitBlockAndInsertIfThen(Value *Cmp) { +// ThenBlock block is created and its terminator is returned. +// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise +// it is terminated with BranchInst to Tail. +static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) { Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode(); BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getParent()->getParent()->getContext(); - BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent()); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - - BranchInst *CheckTerm = BranchInst::Create(Tail, ThenBlock); return CheckTerm; } @@ -253,12 +294,13 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { MappingOffset)); } -void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, +void AddressSanitizer::instrumentMemIntrinsicParam( + AsanFunctionContext &AFC, Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { // Check the first byte. { IRBuilder<> IRB(InsertBefore); - instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); + instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite); } // Check the last byte. { @@ -268,15 +310,16 @@ void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); } } // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { +bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, + MemIntrinsic *MI) { Value *Dst = MI->getDest(); MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); - Value *Src = MemTran ? MemTran->getSource() : NULL; + Value *Src = MemTran ? MemTran->getSource() : 0; Value *Length = MI->getLength(); Constant *ConstLength = dyn_cast<Constant>(Length); @@ -289,12 +332,12 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { Value *Cmp = IRB.CreateICmpNE(Length, Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(Cmp); + InsertBefore = splitBlockAndInsertIfThen(Cmp, false); } - instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); + instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); if (Src) - instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); + instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false); return true; } @@ -324,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { return NULL; } -void AddressSanitizer::instrumentMop(Instruction *I) { +void AddressSanitizer::FindDynamicInitializers(Module& M) { + // Clang generates metadata identifying all dynamically initialized globals. + NamedMDNode *DynamicGlobals = + M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); + if (!DynamicGlobals) + return; + for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { + MDNode *MDN = DynamicGlobals->getOperand(i); + assert(MDN->getNumOperands() == 1); + Value *VG = MDN->getOperand(0); + // The optimizer may optimize away a global entirely, in which case we + // cannot instrument access to it. + if (!VG) + continue; + + GlobalVariable *G = cast<GlobalVariable>(VG); + DynamicallyInitializedGlobals.insert(G); + } +} +// Returns true if a global variable is initialized dynamically in this TU. +bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { + return DynamicallyInitializedGlobals.count(G); +} + +void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { bool IsWrite; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); assert(Addr); - if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { - // We are accessing a global scalar variable. Nothing to catch here. - return; + if (ClOpt && ClOptGlobals) { + if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + if (!ClInitializers) + return; + // If a global variable does not have dynamic initialization we don't + // have to instrument it. However, if a global has external linkage, we + // assume it has dynamic initialization, as it may have an initializer + // in a different TU. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + !HasDynamicInitializer(G)) + return; + } } + Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); @@ -345,7 +424,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) { } IRBuilder<> IRB(I); - instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); + instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -360,18 +439,38 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) { } Instruction *AddressSanitizer::generateCrashCode( - IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) { - // IsWrite and TypeSize are encoded in the function name. - std::string FunctionName = std::string(kAsanReportErrorTemplate) + - (IsWrite ? "store" : "load") + itostr(TypeSize / 8); - Value *ReportWarningFunc = CurrentModule->getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IntptrTy, NULL); - CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr); - Call->setDoesNotReturn(); + Instruction *InsertBefore, Value *Addr, + bool IsWrite, size_t AccessSizeIndex) { + IRBuilder<> IRB(InsertBefore); + CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], + Addr); + // We don't do Call->setDoesNotReturn() because the BB already has + // UnreachableInst at the end. + // This EmptyAsm is required to avoid callback merge. + IRB.CreateCall(EmptyAsm); return Call; } -void AddressSanitizer::instrumentAddress(Instruction *OrigIns, +Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, + Value *ShadowValue, + uint32_t TypeSize) { + size_t Granularity = 1 << MappingScale; + // Addr & (Granularity - 1) + Value *LastAccessedByte = IRB.CreateAnd( + AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + // (Addr & (Granularity - 1)) + size - 1 + if (TypeSize / 8 > 1) + LastAccessedByte = IRB.CreateAdd( + LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + LastAccessedByte = IRB.CreateIntCast( + LastAccessedByte, ShadowValue->getType(), false); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); +} + +void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, + Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite) { Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); @@ -385,32 +484,117 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - - Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp); - IRBuilder<> IRB2(CheckTerm); - + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << MappingScale; - if (TypeSize < 8 * Granularity) { - // Addr & (Granularity - 1) - Value *LastAccessedByte = IRB2.CreateAnd( - AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); - // (Addr & (Granularity - 1)) + size - 1 - if (TypeSize / 8 > 1) - LastAccessedByte = IRB2.CreateAdd( - LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); - // (uint8_t) ((Addr & (Granularity-1)) + size - 1) - LastAccessedByte = IRB2.CreateIntCast( - LastAccessedByte, IRB.getInt8Ty(), false); - // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue - Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue); - - CheckTerm = splitBlockAndInsertIfThen(Cmp2); - } - - IRBuilder<> IRB1(CheckTerm); - Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); + TerminatorInst *CrashTerm = 0; + + if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { + TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false); + assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); + BasicBlock *NextBB = CheckTerm->getSuccessor(0); + IRB.SetInsertPoint(CheckTerm); + Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); + BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); + CrashTerm = new UnreachableInst(*C, CrashBlock); + BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); + ReplaceInstWithInst(CheckTerm, NewTerm); + } else { + CrashTerm = splitBlockAndInsertIfThen(Cmp, true); + } + + Instruction *Crash = + generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); Crash->setDebugLoc(OrigIns->getDebugLoc()); - ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C)); +} + +void AddressSanitizer::createInitializerPoisonCalls(Module &M, + Value *FirstAddr, + Value *LastAddr) { + // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. + Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // If that function is not present, this TU contains no globals, or they have + // all been optimized away + if (!GlobalInit) + return; + + // Set up the arguments to our poison/unpoison functions. + IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + + // Declare our poisoning and unpoisoning functions. + Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); + Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); + AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + + // Add a call to poison all external globals before the given function starts. + IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + + // Add calls to unpoison all globals before each return instruction. + for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); + I != E; ++I) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { + CallInst::Create(AsanUnpoisonGlobals, "", RI); + } + } +} + +bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G); + + if (!Ty->isSized()) return false; + if (!G->hasInitializer()) return false; + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + return false; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + return false; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) return false; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + return false; + } + + if (G->hasSection()) { + StringRef Section(G->getSection()); + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + return false; + } + // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 + // Constant CFString instances are compiled in the following way: + // -- the string buffer is emitted into + // __TEXT,__cstring,cstring_literals + // -- the constant NSConstantString structure referencing that buffer + // is placed into __DATA,__cfstring + // Therefore there's no point in placing redzones into __DATA,__cfstring. + // Moreover, it causes the linker to crash on OS X 10.7 + if (Section.find("__DATA,__cfstring") == 0) { + DEBUG(dbgs() << "Ignoring CFString: " << *G); + return false; + } + } + + return true; } // This function replaces all global variables with new variables that have @@ -419,62 +603,10 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, bool AddressSanitizer::insertGlobalRedzones(Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; - for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), - E = M.getGlobalList().end(); G != E; ++G) { - Type *Ty = cast<PointerType>(G->getType())->getElementType(); - DEBUG(dbgs() << "GLOBAL: " << *G); - - if (!Ty->isSized()) continue; - if (!G->hasInitializer()) continue; - // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. - if (G->getLinkage() != GlobalVariable::ExternalLinkage && - G->getLinkage() != GlobalVariable::PrivateLinkage && - G->getLinkage() != GlobalVariable::InternalLinkage) - continue; - // Two problems with thread-locals: - // - The address of the main thread's copy can't be computed at link-time. - // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - continue; - // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize) continue; - - // Ignore all the globals with the names starting with "\01L_OBJC_". - // Many of those are put into the .cstring section. The linker compresses - // that section by removing the spare \0s after the string terminator, so - // our redzones get broken. - if ((G->getName().find("\01L_OBJC_") == 0) || - (G->getName().find("\01l_OBJC_") == 0)) { - DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); - continue; - } - - if (G->hasSection()) { - StringRef Section(G->getSection()); - // Ignore the globals from the __OBJC section. The ObjC runtime assumes - // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to - // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { - DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); - continue; - } - // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 - // Constant CFString instances are compiled in the following way: - // -- the string buffer is emitted into - // __TEXT,__cstring,cstring_literals - // -- the constant NSConstantString structure referencing that buffer - // is placed into __DATA,__cfstring - // Therefore there's no point in placing redzones into __DATA,__cfstring. - // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { - DEBUG(dbgs() << "Ignoring CFString: " << *G); - continue; - } - } - - GlobalsToChange.push_back(G); + for (Module::GlobalListType::iterator G = M.global_begin(), + E = M.global_end(); G != E; ++G) { + if (ShouldInstrumentGlobal(G)) + GlobalsToChange.push_back(G); } size_t n = GlobalsToChange.size(); @@ -485,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); - SmallVector<Constant *, 16> Initializers(n); + IntptrTy, IntptrTy, + IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n), DynamicInit; IRBuilder<> IRB(CtorInsertBefore); + if (ClInitializers) + FindDynamicInitializers(M); + + // The addresses of the first and last dynamically initialized globals in + // this TU. Used in initialization order checking. + Value *FirstDynamic = 0, *LastDynamic = 0; + for (size_t i = 0; i < n; i++) { GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); @@ -500,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { uint64_t RightRedzoneSize = RedzoneSize + (RedzoneSize - (SizeInBytes % RedzoneSize)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + // Determine whether this global should be poisoned in initialization. + bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -534,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); + + // Populate the first and last globals declared in this TU. + if (ClInitializers && GlobalHasDynamicInitializer) { + LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); + if (FirstDynamic == 0) + FirstDynamic = LastDynamic; + } + DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); } @@ -543,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + // Create calls for poisoning before initializers run and unpoisoning after. + if (ClInitializers && FirstDynamic && LastDynamic) + createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, NULL)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); IRB.CreateCall2(AsanRegisterGlobals, @@ -581,7 +738,6 @@ bool AddressSanitizer::runOnModule(Module &M) { return false; BL.reset(new FunctionBlackList(ClBlackListFile)); - CurrentModule = &M; C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -600,6 +756,23 @@ bool AddressSanitizer::runOnModule(Module &M) { AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); + // Create __asan_report* callbacks. + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + // IsWrite and TypeSize are encoded in the function name. + std::string FunctionName = std::string(kAsanReportErrorTemplate) + + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); + // If we are merging crash callbacks, they have two parameters. + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( + M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + } + } + // We insert an empty inline asm after __asan_report* to avoid callback merge. + EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), + StringRef(""), StringRef(""), + /*hasSideEffects=*/true); + llvm::Triple targetTriple(M.getTargetTriple()); bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; @@ -721,6 +894,8 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { } } + AsanFunctionContext AFC(F); + // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { @@ -728,9 +903,9 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite)) - instrumentMop(Inst); + instrumentMop(AFC, Inst); else - instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); + instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst)); } NumInstrumented++; } diff --git a/lib/Transforms/Scalar/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 0690d76e7b..09e0f14451 100644 --- a/lib/Transforms/Scalar/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bounds-checking" -#include "llvm/Transforms/Scalar.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/Pass.h" @@ -25,6 +24,7 @@ #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Instrumentation.h" using namespace llvm; static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", @@ -67,11 +67,8 @@ namespace { } char BoundsChecking::ID = 0; -INITIALIZE_PASS_BEGIN(BoundsChecking, "bounds-checking", - "Run-time bounds checking", false, false) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_PASS_END(BoundsChecking, "bounds-checking", - "Run-time bounds checking", false, false) +INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", + false, false) /// getTrapBB - create a basic block that traps. All overflowing conditions @@ -141,6 +138,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Size = SizeOffset.first; Value *Offset = SizeOffset.second; + ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); IntegerType *IntTy = TD->getIntPtrType(Inst->getContext()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); @@ -149,12 +147,17 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { // . Offset >= 0 (since the offset is given from the base ptr) // . Size >= Offset (unsigned) // . Size - Offset >= NeededSize (unsigned) + // + // optimization: if Size >= 0 (signed), skip 1st check // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows Value *ObjSize = Builder->CreateSub(Size, Offset); - Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); Value *Cmp2 = Builder->CreateICmpULT(Size, Offset); Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); - Value *Or = Builder->CreateOr(Cmp1, Builder->CreateOr(Cmp2, Cmp3)); + Value *Or = Builder->CreateOr(Cmp2, Cmp3); + if (!SizeCI || SizeCI->getValue().slt(0)) { + Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); + Or = Builder->CreateOr(Cmp1, Or); + } emitBranchToTrap(Or); ++ChecksAdded; diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index eaa3a4000f..00de882f17 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BoundsChecking.cpp EdgeProfiling.cpp FunctionBlackList.cpp GCOVProfiling.cpp diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index c7266e2f8d..1e0b4a348a 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -20,11 +20,12 @@ using namespace llvm; /// initializeInstrumentation - Initialize all passes in the TransformUtils /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { + initializeAddressSanitizerPass(Registry); + initializeBoundsCheckingPass(Registry); initializeEdgeProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); - initializeGCOVProfilerPass(Registry); - initializeAddressSanitizerPass(Registry); initializeThreadSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index f76c77e1bd..a4bb5a66af 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -26,30 +26,6 @@ namespace llvm { /// The type parameter T determines the type of the nodes of the graph. template <typename T> class MaximumSpanningTree { - - // A comparing class for comparing weighted edges. - template <typename CT> - struct EdgeWeightCompare { - bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, - typename MaximumSpanningTree<CT>::EdgeWeight Y) const { - if (X.second > Y.second) return true; - if (X.second < Y.second) return false; - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - return false; - } - }; - public: typedef std::pair<const T*, const T*> Edge; typedef std::pair<Edge, double> EdgeWeight; @@ -59,6 +35,33 @@ namespace llvm { MaxSpanTree MST; + private: + // A comparing class for comparing weighted edges. + struct EdgeWeightCompare { + static bool getBlockSize(const T *X) { + const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X); + return BB ? BB->size() : 0; + } + + bool operator()(EdgeWeight X, EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + + // Equal edge weights: break ties by comparing block sizes. + size_t XSizeA = getBlockSize(X.first.first); + size_t YSizeA = getBlockSize(Y.first.first); + if (XSizeA > YSizeA) return true; + if (XSizeA < YSizeA) return false; + + size_t XSizeB = getBlockSize(X.first.second); + size_t YSizeB = getBlockSize(Y.first.second); + if (XSizeB > YSizeB) return true; + if (XSizeB < YSizeB) return false; + + return false; + } + }; + public: static char ID; // Class identification, replacement for typeinfo @@ -66,7 +69,7 @@ namespace llvm { /// spanning tree. MaximumSpanningTree(EdgeWeights &EdgeVector) { - std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare()); // Create spanning tree, Forest contains a special data structure // that makes checking if two nodes are already in a common (sub-)tree diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index b2147968df..cc27146ebc 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -55,11 +55,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeBuilder.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 4c12a9b624..dc0fa7175d 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -319,7 +319,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { if (Idx < 0) return false; if (IsWrite && isVtableAccess(I)) { + DEBUG(dbgs() << " VPTR : " << *I << "\n"); Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); + // StoredValue does not necessary have a pointer type. + if (isa<IntegerType>(StoredValue->getType())) + StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); + // Call TsanVptrUpdate. IRB.CreateCall2(TsanVptrUpdate, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index ba214d1a33..b344952cc5 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -9,7 +9,7 @@ // // This file implements the Aggressive Dead Code Elimination pass. This pass // optimistically assumes that all instructions are dead until proven otherwise, -// allowing it to eliminate dead computations that other DCE passes do not +// allowing it to eliminate dead computations that other DCE passes do not // catch, particularly involving loop computations. // //===----------------------------------------------------------------------===// @@ -36,13 +36,13 @@ namespace { ADCE() : FunctionPass(ID) { initializeADCEPass(*PassRegistry::getPassRegistry()); } - + virtual bool runOnFunction(Function& F); - + virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.setPreservesCFG(); } - + }; } @@ -52,7 +52,7 @@ INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; SmallVector<Instruction*, 128> worklist; - + // Collect the set of "root" instructions that are known live. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa<TerminatorInst>(I.getInstructionIterator()) || @@ -62,7 +62,7 @@ bool ADCE::runOnFunction(Function& F) { alive.insert(I.getInstructionIterator()); worklist.push_back(I.getInstructionIterator()); } - + // Propagate liveness backwards to operands. while (!worklist.empty()) { Instruction* curr = worklist.pop_back_val(); @@ -72,7 +72,7 @@ bool ADCE::runOnFunction(Function& F) { if (alive.insert(Inst)) worklist.push_back(Inst); } - + // The inverse of the live set is the dead set. These are those instructions // which have no side effects and do not influence the control flow or return // value of the function, and may therefore be deleted safely. @@ -82,7 +82,7 @@ bool ADCE::runOnFunction(Function& F) { worklist.push_back(I.getInstructionIterator()); I->dropAllReferences(); } - + for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { ++NumRemoved; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index bf9cc66392..a01e0661b1 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,7 +1,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp BasicBlockPlacement.cpp - BoundsChecking.cpp CodeGenPrepare.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index cbc089ab78..a8deda8b74 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); -// FIXME: Remove this abomination once all of the tests pass without it! -static cl::opt<bool> DisableDeleteDeadBlocks( - "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false), - cl::desc("Disable deleting dead blocks in CodeGenPrepare")); - static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -83,7 +78,7 @@ namespace { const TargetLibraryInfo *TLInfo; DominatorTree *DT; ProfileInfo *PFI; - + /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should /// update it. @@ -116,6 +111,7 @@ namespace { } private: + bool EliminateFallThrough(Function &F); bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); @@ -157,7 +153,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= EliminateMostlyEmptyBlocks(F); // llvm.dbg.value is far away from the value then iSel may not be able - // handle it properly. iSel will drop llvm.dbg.value if it can not + // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. EverMadeChange |= PlaceDbgValues(F); @@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { WorkList.insert(*II); } - if (!DisableDeleteDeadBlocks) - for (SmallPtrSet<BasicBlock*, 8>::iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) - DeleteDeadBlock(*I); + for (SmallPtrSet<BasicBlock*, 8>::iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) + DeleteDeadBlock(*I); + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); if (MadeChange) ModifiedDT = true; @@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + if (!SinglePred || SinglePred == BB) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + } + } + return Changed; +} + /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, /// debug info directives, and an unconditional branch. Passes before isel /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for @@ -336,7 +369,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } @@ -547,7 +580,7 @@ protected: bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { BasicBlock *BB = CI->getParent(); - + // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. @@ -564,19 +597,19 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { if (OptimizeInlineAsmInst(CI)) return true; } - + // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); Type *ReturnTy = CI->getType(); - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. WeakVH IterHandle(CurInstIterator); - + replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0, TLInfo, ModifiedDT ? 0 : DT); @@ -604,13 +637,13 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; - + // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD); + return Simplifier.fold(CI, TD, TLInfo); } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; + PHINode *PN = 0; + BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); - PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; - if (V && !PN) - return false; + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) @@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; if (&*BI != RI) return false; } else { @@ -760,13 +804,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy) { Value *Repl = Addr; - - // Try to collapse single-value PHI nodes. This is necessary to undo + + // Try to collapse single-value PHI nodes. This is necessary to undo // unprofitable PRE transformations. SmallVector<Value*, 8> worklist; SmallPtrSet<Value*, 16> Visited; worklist.push_back(Addr); - + // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. @@ -778,20 +822,20 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, while (!worklist.empty()) { Value *V = worklist.back(); worklist.pop_back(); - + // Break use-def graph loops. if (!Visited.insert(V)) { Consensus = 0; break; } - + // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) worklist.push_back(P->getIncomingValue(i)); continue; } - + // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = @@ -826,15 +870,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } continue; } - + Consensus = 0; break; } - + // If the addressing mode couldn't be determined, or if multiple different // ones were determined, bail out now. if (!Consensus) return false; - + // Check to see if any of the instructions supersumed by this addr mode are // non-local to I's BB. bool AnyNonLocal = false; @@ -943,7 +987,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a WeakVH to hold onto it in case this happens. WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - + RecursivelyDeleteTriviallyDeadInstructions(Repl); if (IterHandle != CurInstIterator) { @@ -955,7 +999,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // This address is now available for reassignment, so erase the table // entry; we don't want to match some completely different instruction. SunkAddrs[Addr] = 0; - } + } } ++NumMemoryInsts; return true; @@ -967,12 +1011,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; - TargetLowering::AsmOperandInfoVector + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - + // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); @@ -1187,7 +1231,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CastInst *CI = dyn_cast<CastInst>(I)) { // If the source of the cast is a constant, then this should have // already been constant folded. The only reason NOT to constant fold @@ -1207,23 +1251,23 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CmpInst *CI = dyn_cast<CmpInst>(I)) return OptimizeCmpExpression(CI); - + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (TLI) return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); return false; } - + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (TLI) return OptimizeMemoryInst(I, SI->getOperand(1), SI->getOperand(0)->getType()); return false; } - + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -1237,7 +1281,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); @@ -1265,7 +1309,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { } // llvm.dbg.value is far away from the value then iSel may not be able -// handle it properly. iSel will drop llvm.dbg.value if it can not +// handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c8448fa6c1..8b1283ff25 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -248,7 +248,7 @@ static bool isShortenable(Instruction *I) { // Don't shorten stores for now if (isa<StoreInst>(I)) return false; - + IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: return false; @@ -292,7 +292,7 @@ namespace { /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, @@ -315,7 +315,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, if (AA.getTargetData() == 0 && Later.Ptr->getType() == Earlier.Ptr->getType()) return OverwriteComplete; - + return OverwriteUnknown; } @@ -378,10 +378,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && + Later.Size >= Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) return OverwriteComplete; - + // The other interesting case is if the later store overwrites the end of // the earlier store // @@ -520,11 +520,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && + if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, + DepWriteOffset, InstWriteOffset); if (OR == OverwriteComplete) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); @@ -533,7 +533,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - + // DeleteDeadInstruction can delete the current instruction in loop // cases, reset BBI. BBI = Inst; @@ -551,16 +551,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { unsigned DepWriteAlign = DepIntrinsic->getAlignment(); if (llvm::isPowerOf2_64(InstWriteOffset) || ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " << DepLoc.Size << ")" << *Inst << '\n'); - + Value* DepWriteLength = DepIntrinsic->getLength(); Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - + InstWriteOffset - DepWriteOffset); DepIntrinsic->setLength(TrimmedLength); MadeChange = true; @@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) { + if (isa<AllocaInst>(BBI)) { + // Remove allocas from the list of dead stack objects; there can't be + // any references before the definition. DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast<Value>(BBI)) { + // Remove allocation function calls from the list of dead stack objects; + // there can't be any references before the definition. + if (isAllocLikeFn(BBI)) + DeadStackObjects.remove(BBI); + // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) @@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) - return MadeChange; + break; continue; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index f3c92d64c2..975954953b 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -39,7 +39,7 @@ static unsigned getHash(const void *V) { } //===----------------------------------------------------------------------===// -// SimpleValue +// SimpleValue //===----------------------------------------------------------------------===// namespace { @@ -47,16 +47,16 @@ namespace { /// scoped hash table. struct SimpleValue { Instruction *Inst; - + SimpleValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - + bool isSentinel() const { return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); } - + static bool canHandle(Instruction *Inst) { // This can only handle non-void readnone functions. if (CallInst *CI = dyn_cast<CallInst>(Inst)) @@ -90,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> { unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { Instruction *Inst = Val.Inst; - + // Hash in all of the operands as pointers. unsigned Res = 0; for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) @@ -126,13 +126,13 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { if (LHS.isSentinel() || RHS.isSentinel()) return LHSI == RHSI; - + if (LHSI->getOpcode() != RHSI->getOpcode()) return false; return LHSI->isIdenticalTo(RHSI); } //===----------------------------------------------------------------------===// -// CallValue +// CallValue //===----------------------------------------------------------------------===// namespace { @@ -140,21 +140,21 @@ namespace { /// the scoped hash table. struct CallValue { Instruction *Inst; - + CallValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - + bool isSentinel() const { return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); } - + static bool canHandle(Instruction *Inst) { // Don't value number anything that returns void. if (Inst->getType()->isVoidTy()) return false; - + CallInst *CI = dyn_cast<CallInst>(Inst); if (CI == 0 || !CI->onlyReadsMemory()) return false; @@ -168,7 +168,7 @@ namespace llvm { template<> struct isPodLike<CallValue> { static const bool value = true; }; - + template<> struct DenseMapInfo<CallValue> { static inline CallValue getEmptyKey() { return DenseMapInfo<Instruction*>::getEmptyKey(); @@ -189,7 +189,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { "Cannot value number calls with metadata operands"); Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); } - + // Mix in the opcode. return (Res << 1) ^ Inst->getOpcode(); } @@ -203,11 +203,11 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { //===----------------------------------------------------------------------===// -// EarlyCSE pass. +// EarlyCSE pass. //===----------------------------------------------------------------------===// namespace { - + /// EarlyCSE - This pass does a simple depth-first walk over the dominator /// tree, eliminating trivially redundant instructions and using instsimplify /// to canonicalize things as it goes. It is intended to be fast and catch @@ -223,14 +223,14 @@ public: ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>, AllocatorTy> ScopedHTType; - + /// AvailableValues - This scoped hash table contains the current values of /// all of our simple scalar expressions. As we walk down the domtree, we /// look to see if instructions are in this: if so, we replace them with what /// we find, otherwise we insert them so that dominated values can succeed in /// their lookup. ScopedHTType *AvailableValues; - + /// AvailableLoads - This scoped hash table contains the current values /// of loads. This allows us to get efficient access to dominating loads when /// we have a fully redundant load. In addition to the most recent load, we @@ -243,15 +243,15 @@ public: typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>, DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType; LoadHTType *AvailableLoads; - + /// AvailableCalls - This scoped hash table contains the current values /// of read-only call values. It uses the same generation count as loads. typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType; CallHTType *AvailableCalls; - + /// CurrentGeneration - This is the current generation of the memory value. unsigned CurrentGeneration; - + static char ID; explicit EarlyCSE() : FunctionPass(ID) { initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); @@ -326,7 +326,7 @@ private: }; bool processNode(DomTreeNode *Node); - + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); @@ -350,7 +350,7 @@ INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) bool EarlyCSE::processNode(DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); - + // If this block has a single predecessor, then the predecessor is the parent // of the domtree node and all of the live out memory values are still current // in this block. If this block has multiple predecessors, then they could @@ -359,20 +359,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // predecessors. if (BB->getSinglePredecessor() == 0) ++CurrentGeneration; - + /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead /// stores which can occur in bitfield code among other things. StoreInst *LastStore = 0; - + bool Changed = false; // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; - + // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); @@ -381,7 +381,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumSimplify; continue; } - + // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { @@ -392,7 +392,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumSimplify; continue; } - + // If this is a simple instruction that we can value number, process it. if (SimpleValue::canHandle(Inst)) { // See if the instruction has an available value. If so, use it. @@ -404,12 +404,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSE; continue; } - + // Otherwise, just remember that this value is available. AvailableValues->insert(Inst, Inst); continue; } - + // If this is a non-volatile load, process it. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { // Ignore volatile loads. @@ -417,7 +417,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore = 0; continue; } - + // If we have an available version of this load, and if it is the right // generation, replace this instruction. std::pair<Value*, unsigned> InVal = @@ -431,18 +431,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSELoad; continue; } - + // Otherwise, remember that we have this instruction. AvailableLoads->insert(Inst->getOperand(0), std::pair<Value*, unsigned>(Inst, CurrentGeneration)); LastStore = 0; continue; } - + // If this instruction may read from memory, forget LastStore. if (Inst->mayReadFromMemory()) LastStore = 0; - + // If this is a read-only call, process it. if (CallValue::canHandle(Inst)) { // If we have an available version of this call, and if it is the right @@ -457,19 +457,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSECall; continue; } - + // Otherwise, remember that we have this instruction. AvailableCalls->insert(Inst, std::pair<Value*, unsigned>(Inst, CurrentGeneration)); continue; } - + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. if (Inst->mayWriteToMemory()) { ++CurrentGeneration; - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // We do a trivial form of DSE if there are two stores to the same // location with no intervening loads. Delete the earlier store. @@ -483,7 +483,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore = 0; continue; } - + // Okay, we just invalidated anything we knew about loaded values. Try // to salvage *something* by remembering that the stored value is a live // version of the pointer. It is safe to forward from volatile stores @@ -491,7 +491,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // the store. AvailableLoads->insert(SI->getPointerOperand(), std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); - + // Remember that this was the last store we saw for DSE. if (SI->isSimple()) LastStore = SI; @@ -509,7 +509,7 @@ bool EarlyCSE::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); - + // Tables that the pass uses when walking the domtree. ScopedHTType AVTable; AvailableValues = &AVTable; @@ -517,7 +517,7 @@ bool EarlyCSE::runOnFunction(Function &F) { AvailableLoads = &LoadTable; CallHTType CallTable; AvailableCalls = &CallTable; - + CurrentGeneration = 0; bool Changed = false; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 476ec383e6..4822fd0944 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -173,7 +173,7 @@ Expression ValueTable::create_expression(Instruction *I) { if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); } - + if (CmpInst *C = dyn_cast<CmpInst>(I)) { // Sort the operand value numbers so x<y and y>x get the same value number. CmpInst::Predicate Predicate = C->getPredicate(); @@ -187,7 +187,7 @@ Expression ValueTable::create_expression(Instruction *I) { II != IE; ++II) e.varargs.push_back(*II); } - + return e; } @@ -391,7 +391,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - + Instruction* I = cast<Instruction>(V); Expression exp; switch (I->getOpcode()) { @@ -507,17 +507,17 @@ namespace { const TargetLibraryInfo *TLI; ValueTable VN; - + /// LeaderTable - A mapping from value numbers to lists of Value*'s that /// have that value number. Use findLeader to query it. struct LeaderTableEntry { Value *Val; - BasicBlock *BB; + const BasicBlock *BB; LeaderTableEntry *Next; }; DenseMap<uint32_t, LeaderTableEntry> LeaderTable; BumpPtrAllocator TableAllocator; - + SmallVector<Instruction*, 8> InstrsToErase; public: static char ID; // Pass identification, replacement for typeid @@ -527,14 +527,14 @@ namespace { } bool runOnFunction(Function &F); - + /// markInstructionForDeletion - This removes the specified instruction from /// our various maps and marks it for deletion. void markInstructionForDeletion(Instruction *I) { VN.erase(I); InstrsToErase.push_back(I); } - + const TargetData *getTargetData() const { return TD; } DominatorTree &getDominatorTree() const { return *DT; } AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } @@ -542,21 +542,21 @@ namespace { private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. - void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - + LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; Curr.Next = Node; } - + /// removeFromLeaderTable - Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { @@ -567,7 +567,7 @@ namespace { Prev = Curr; Curr = Curr->Next; } - + if (Prev) { Prev->Next = Curr->Next; } else { @@ -597,7 +597,7 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } - + // Helper fuctions // FIXME: eliminate or document these better @@ -608,13 +608,13 @@ namespace { void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); - Value *findLeader(BasicBlock *BB, uint32_t num); + Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); unsigned replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root); - bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); + const BasicBlockEdge &Root); + bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); }; char GVN::ID = 0; @@ -735,15 +735,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) return false; - + // The store has to be at least as big as the load. if (TD.getTypeSizeInBits(StoredVal->getType()) < TD.getTypeSizeInBits(LoadTy)) return false; - + return true; } - + /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and /// then a load from a must-aliased pointer of a different type, try to coerce @@ -751,80 +751,80 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, /// InsertPt is the place to insert new instructions. /// /// If we can't do it, return null. -static Value *CoerceAvailableValueToLoadType(Value *StoredVal, +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, Instruction *InsertPt, const TargetData &TD) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; - + // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); - + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); - + // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { // Pointer to Pointer -> use bitcast. if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - + // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - + Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); - + if (StoredValTy != TypeToCastTo) StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); - + // Cast to pointer if the load needs a pointer type. if (LoadedTy->isPointerTy()) StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); - + return StoredVal; } - + // If the loaded value is smaller than the available value, then we can // extract out a piece from it. If the available value is too small, then we // can't do anything. assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); - + // Convert source pointers to integers, which can be manipulated. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - + // Convert vectors and fp to integer, which can be manipulated. if (!StoredValTy->isIntegerTy()) { StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); } - + // If this is a big-endian system, we need to shift the value down to the low // bits so that a truncate will work. if (TD.isBigEndian()) { Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); } - + // Truncate the integer to the right size now. Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); - + if (LoadedTy == NewIntTy) return StoredVal; - + // If the result is a pointer, inttoptr. if (LoadedTy->isPointerTy()) return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); - + // Otherwise, bitcast. return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } @@ -845,13 +845,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, // to transform them. We need to be able to bitcast to integer. if (LoadTy->isStructTy() || LoadTy->isArrayTy()) return -1; - + int64_t StoreOffset = 0, LoadOffset = 0; Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; - + // If the load and store are to the exact same address, they should have been // a must alias. AA must have gotten confused. // FIXME: Study to see if/when this happens. One case is forwarding a memset @@ -866,18 +866,18 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, abort(); } #endif - + // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); - + if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. LoadSize >>= 3; - - + + bool isAAFailure = false; if (StoreOffset < LoadOffset) isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; @@ -895,7 +895,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, #endif return -1; } - + // If the Load isn't completely contained within the stored bits, we don't // have all the bits to feed it. We could do something crazy in the future // (issue a smaller load then merge the bits in) but this seems unlikely to be @@ -903,11 +903,11 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, if (StoreOffset > LoadOffset || StoreOffset+StoreSize < LoadOffset+LoadSize) return -1; - + // Okay, we can do this transformation. Return the number of bytes into the // store that the load is. return LoadOffset-StoreOffset; -} +} /// AnalyzeLoadFromClobberingStore - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. @@ -933,23 +933,23 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // Cannot handle reading from store of first-class aggregate yet. if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - + Value *DepPtr = DepLI->getPointerOperand(); uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); if (R != -1) return R; - + // If we have a load/load clobber an DepLI can be widened to cover this load, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); - + unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); if (Size == 0) return -1; - + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); } @@ -968,29 +968,29 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (MI->getIntrinsicID() == Intrinsic::memset) return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), MemSizeInBits, TD); - + // If we have a memcpy/memmove, the only case we can handle is if this is a // copy from constant memory. In that case, we can read directly from the // constant memory. MemTransferInst *MTI = cast<MemTransferInst>(MI); - + Constant *Src = dyn_cast<Constant>(MTI->getSource()); if (Src == 0) return -1; - + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD)); if (GV == 0 || !GV->isConstant()) return -1; - + // See if the access is within the bounds of the transfer. int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), MemSizeInBits, TD); if (Offset == -1) return Offset; - + // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, llvm::Type::getInt8PtrTy(Src->getContext())); - Constant *OffsetCst = + Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); @@ -998,7 +998,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, return Offset; return -1; } - + /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means @@ -1009,32 +1009,32 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; - + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); - + // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPointerTy()) SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); - + // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; if (TD.isLittleEndian()) ShiftAmt = Offset*8; else ShiftAmt = (StoreSize-LoadSize-Offset)*8; - + if (ShiftAmt) SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); - + if (LoadSize != StoreSize) SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); - + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } @@ -1061,14 +1061,14 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, NewLoadSize = NextPowerOf2(NewLoadSize); Value *PtrVal = SrcVal->getPointerOperand(); - + // Insert the new load after the old load. This ensures that subsequent // memdep queries will find the new load. We can't easily remove the old // load completely because it is already in the value numbering table. IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - Type *DestPTy = + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize*8); - DestPTy = PointerType::get(DestPTy, + DestPTy = PointerType::get(DestPTy, cast<PointerType>(PtrVal->getType())->getAddressSpace()); Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); @@ -1078,7 +1078,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); - + // Replace uses of the original load with the wider load. On a big endian // system, we need to shift down to get the relevant bits. Value *RV = NewLoad; @@ -1087,7 +1087,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); RV = Builder.CreateTrunc(RV, SrcVal->getType()); SrcVal->replaceAllUsesWith(RV); - + // We would like to use gvn.markInstructionForDeletion here, but we can't // because the load is already memoized into the leader map table that GVN // tracks. It is potentially possible to remove the load from the table, @@ -1096,7 +1096,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, gvn.getMemDep().removeInstruction(SrcVal); SrcVal = NewLoad; } - + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); } @@ -1110,7 +1110,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); - + // We know that this method is only called when the mem transfer fully // provides the bits for the load. if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { @@ -1119,9 +1119,9 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Value *Val = MSI->getValue(); if (LoadSize != 1) Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); - + Value *OneElt = Val; - + // Splat the value out to the right number of bits. for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { // If we can double the number of bytes set, do it. @@ -1131,16 +1131,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, NumBytesSet <<= 1; continue; } - + // Otherwise insert one byte at a time. Value *ShVal = Builder.CreateShl(Val, 1*8); Val = Builder.CreateOr(OneElt, ShVal); ++NumBytesSet; } - + return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); } - + // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); Constant *Src = cast<Constant>(MTI->getSource()); @@ -1149,7 +1149,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, llvm::Type::getInt8PtrTy(Src->getContext())); - Constant *OffsetCst = + Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); @@ -1166,13 +1166,13 @@ struct AvailableValueInBlock { LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; - + /// V - The value that is live out of the block. PointerIntPair<Value *, 2, ValType> Val; - + /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; - + static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset = 0) { AvailableValueInBlock Res; @@ -1192,7 +1192,7 @@ struct AvailableValueInBlock { Res.Offset = Offset; return Res; } - + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, unsigned Offset = 0) { AvailableValueInBlock Res; @@ -1211,17 +1211,17 @@ struct AvailableValueInBlock { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } - + LoadInst *getCoercedLoadValue() const { assert(isCoercedLoadValue() && "Wrong accessor"); return cast<LoadInst>(Val.getPointer()); } - + MemIntrinsic *getMemIntrinValue() const { assert(isMemIntrinValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } - + /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { @@ -1233,7 +1233,7 @@ struct AvailableValueInBlock { assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1245,7 +1245,7 @@ struct AvailableValueInBlock { } else { Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), gvn); - + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " << *getCoercedLoadValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1268,12 +1268,12 @@ struct AvailableValueInBlock { /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. -static Value *ConstructSSAForLoadSet(LoadInst *LI, +static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. - if (ValuesPerBlock.size() == 1 && + if (ValuesPerBlock.size() == 1 && gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); @@ -1282,29 +1282,29 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - + Type *LoadTy = LI->getType(); - + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; BasicBlock *BB = AV.BB; - + if (SSAUpdate.HasValueForBlock(BB)) continue; SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } - + // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); - + // If new PHI nodes were created, notify alias analysis. if (V->getType()->isPointerTy()) { AliasAnalysis *AA = gvn.getAliasAnalysis(); - + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); - + // Now that we've copied information to the new PHIs, scan through // them again and inform alias analysis that we've added potentially // escaping uses to any values that are operands to these PHIs. @@ -1376,7 +1376,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // the pointer operand of the load if PHI translation occurs. Make sure // to consider the right address. Value *Address = Deps[i].getAddress(); - + // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. @@ -1392,7 +1392,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { } } } - + // Check to see if we have something like this: // load i32* P // load i8* (P+1) @@ -1404,7 +1404,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), LI->getPointerOperand(), DepLI, *TD); - + if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, Offset)); @@ -1423,10 +1423,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, Offset)); continue; - } + } } } - + UnavailableBlocks.push_back(DepBB); continue; } @@ -1443,7 +1443,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { UndefValue::get(LI->getType()))); continue; } - + if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. @@ -1461,7 +1461,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { S->getValueOperand())); continue; } - + if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { @@ -1470,12 +1470,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ UnavailableBlocks.push_back(DepBB); continue; - } + } } ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } - + UnavailableBlocks.push_back(DepBB); continue; } @@ -1489,7 +1489,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - + // Perform PHI construction. Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); @@ -1532,10 +1532,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; if (Blockers.count(TmpBB)) return false; - + // If any of these blocks has more than one successor (i.e. if the edge we - // just traversed was critical), then there are other paths through this - // block along which the load may not be anticipated. Hoisting the load + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load // above this block would be adding the load to execution paths along // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) @@ -1613,7 +1613,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - + // If this load is unavailable in multiple predecessors, reject it. // FIXME: If we could restructure the CFG, we could make a common pred with // all the preds that don't have an available LI and insert a new load into @@ -1690,10 +1690,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " << *NewInsts.back() << '\n'); - + // Assign value numbers to the new instructions. for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { - // FIXME: We really _ought_ to insert these value numbers into their + // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into // ordering issues. If a block hasn't been processed yet, we would be // marking a value as AVAIL-IN, which isn't what we intend. @@ -1795,7 +1795,7 @@ bool GVN::processLoad(LoadInst *L) { markInstructionForDeletion(L); return true; } - + // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); @@ -1821,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) { AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, L->getType(), L, *TD); } - + // Check to see if we have something like this: // load i32* P // load i8* (P+1) @@ -1831,14 +1831,14 @@ bool GVN::processLoad(LoadInst *L) { // we have the first instruction in the entry block. if (DepLI == L) return false; - + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), L->getPointerOperand(), DepLI, *TD); if (Offset != -1) AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); } - + // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { @@ -1848,11 +1848,11 @@ bool GVN::processLoad(LoadInst *L) { if (Offset != -1) AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } - + if (AvailVal) { DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' << *AvailVal << '\n' << *L << "\n\n\n"); - + // Replace the load! L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) @@ -1862,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L) { return true; } } - + // If the value isn't available, don't do anything! if (Dep.isClobber()) { DEBUG( @@ -1892,7 +1892,7 @@ bool GVN::processLoad(LoadInst *L) { Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); - + // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). @@ -1902,11 +1902,11 @@ bool GVN::processLoad(LoadInst *L) { L, *TD); if (StoredVal == 0) return false; - + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal << '\n' << *L << "\n\n\n"); } - else + else return false; } @@ -1921,7 +1921,7 @@ bool GVN::processLoad(LoadInst *L) { if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { Value *AvailableVal = DepLI; - + // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). @@ -1931,14 +1931,14 @@ bool GVN::processLoad(LoadInst *L) { L, *TD); if (AvailableVal == 0) return false; - + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal << "\n" << *L << "\n\n\n"); } - else + else return false; } - + // Remove it! patchAndReplaceAllUsesWith(AvailableVal, L); if (DepLI->getType()->isPointerTy()) @@ -1957,7 +1957,7 @@ bool GVN::processLoad(LoadInst *L) { ++NumGVNLoad; return true; } - + // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { @@ -1972,28 +1972,28 @@ bool GVN::processLoad(LoadInst *L) { return false; } -// findLeader - In order to find a leader for a given value number at a +// findLeader - In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, -// and then scan the list to find one whose block dominates the block in +// and then scan the list to find one whose block dominates the block in // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. -Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { +Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; if (!Vals.Val) return 0; - + Value *Val = 0; if (DT->dominates(Vals.BB, BB)) { Val = Vals.Val; if (isa<Constant>(Val)) return Val; } - + LeaderTableEntry* Next = Vals.Next; while (Next) { if (DT->dominates(Next->BB, BB)) { if (isa<Constant>(Next->Val)) return Next->Val; if (!Val) Val = Next->Val; } - + Next = Next->Next; } @@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// use is dominated by the given basic block. Returns the number of uses that /// were replaced. unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root) { + const BasicBlockEdge &Root) { unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE; ) { Use &U = (UI++).getUse(); - // If From occurs as a phi node operand then the use implicitly lives in the - // corresponding incoming block. Otherwise it is the block containing the - // user that must be dominated by Root. - BasicBlock *UsingBlock; - if (PHINode *PN = dyn_cast<PHINode>(U.getUser())) - UsingBlock = PN->getIncomingBlock(U); - else - UsingBlock = cast<Instruction>(U.getUser())->getParent(); - - if (DT->dominates(Root, UsingBlock)) { + if (DT->dominates(Root, U)) { U.set(To); ++Count; } @@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, return Count; } +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, + DominatorTree *DT) { + // While in theory it is interesting to consider the case in which Dst has + // more than one predecessor, because Dst might be part of a loop which is + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; + return Pred != 0; +} + /// propagateEquality - The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { +bool GVN::propagateEquality(Value *LHS, Value *RHS, + const BasicBlockEdge &Root) { SmallVector<std::pair<Value*, Value*>, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; + // For speed, compute a conservative fast approximation to + // DT->dominates(Root, Root.getEnd()); + bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); while (!Worklist.empty()) { std::pair<Value*, Value*> Item = Worklist.pop_back_val(); @@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { LVN = RVN; } } - assert((!isa<Instruction>(RHS) || - DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) && - "Instruction doesn't dominate scope!"); // If value numbering later sees that an instruction in the scope is equal // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve @@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // if RHS is an instruction (if an instruction in the scope is morphed into // LHS then it will be turned into RHS by the next GVN iteration anyway, so // using the leader table is about compiling faster, not optimizing better). - if (!isa<Instruction>(RHS)) - addToLeaderTable(LVN, RHS, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd && !isa<Instruction>(RHS)) + addToLeaderTable(LVN, RHS, Root.getEnd()); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // If the number we were assigned was brand new then there is no point in // looking for an instruction realizing it: there cannot be one! if (Num < NextNum) { - Value *NotCmp = findLeader(Root, Num); + Value *NotCmp = findLeader(Root.getEnd(), Num); if (NotCmp && isa<Instruction>(NotCmp)) { unsigned NumReplacements = replaceAllDominatedUsesWith(NotCmp, NotVal, Root); @@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { } // Ensure that any instruction in scope that gets the "A < B" value number // is replaced with false. - addToLeaderTable(Num, NotVal, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd) + addToLeaderTable(Num, NotVal, Root.getEnd()); continue; } @@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { return Changed; } -/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return -/// true if every path from the entry block to 'Dst' passes via this edge. In -/// particular 'Dst' must not be reachable via another edge from 'Src'. -static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, - DominatorTree *DT) { - // While in theory it is interesting to consider the case in which Dst has - // more than one predecessor, because Dst might be part of a loop which is - // only reachable from Src, in practice it is pointless since at the time - // GVN runs all such loops have preheaders, which means that Dst will have - // been changed to have only one predecessor, namely Src. - BasicBlock *Pred = Dst->getSinglePredecessor(); - assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); - (void)Src; - return Pred != 0; -} - /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I) { @@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) { BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // Avoid multiple edges early. + if (TrueSucc == FalseSucc) + return false; + BasicBlock *Parent = BI->getParent(); bool Changed = false; - if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getTrue(TrueSucc->getContext()), - TrueSucc); + Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); + BasicBlockEdge TrueE(Parent, TrueSucc); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE); - if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getFalse(FalseSucc->getContext()), - FalseSucc); + Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); + BasicBlockEdge FalseE(Parent, FalseSucc); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE); return Changed; } @@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) - Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst); + BasicBlockEdge E(Parent, Dst); + if (E.isSingleEdge()) + Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); } return Changed; } @@ -2243,7 +2244,7 @@ bool GVN::processInstruction(Instruction *I) { // Instructions with void type don't return a value, so there's // no point in trying to find redundancies in them. if (I->getType()->isVoidTy()) return false; - + uint32_t NextNum = VN.getNextUnusedValueNumber(); unsigned Num = VN.lookup_or_add(I); @@ -2261,7 +2262,7 @@ bool GVN::processInstruction(Instruction *I) { addToLeaderTable(Num, I, I->getParent()); return false; } - + // Perform fast-path value-number based elimination of values inherited from // dominators. Value *repl = findLeader(I->getParent(), Num); @@ -2270,7 +2271,7 @@ bool GVN::processInstruction(Instruction *I) { addToLeaderTable(Num, I, I->getParent()); return false; } - + // Remove it! patchAndReplaceAllUsesWith(repl, I); if (MD && repl->getType()->isPointerTy()) @@ -2297,7 +2298,7 @@ bool GVN::runOnFunction(Function& F) { // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = FI++; - + bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) ++NumGVNBlocks; @@ -2454,7 +2455,7 @@ bool GVN::performPRE(Function &F) { // we would need to insert instructions in more than one pred. if (NumWithout != 1 || NumWith == 0) continue; - + // Don't do PRE across indirect branch. if (isa<IndirectBrInst>(PREPred->getTerminator())) continue; @@ -2530,7 +2531,7 @@ bool GVN::performPRE(Function &F) { unsigned jj = PHINode::getOperandNumForIncomingValue(ii); VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); } - + if (MD) MD->invalidateCachedPointerInfo(Phi); } @@ -2567,7 +2568,7 @@ bool GVN::splitCriticalEdges() { /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2602,7 +2603,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const { I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { const LeaderTableEntry *Node = &I->second; assert(Node->Val != Inst && "Inst still in value numbering scope!"); - + while (Node->Next) { Node = Node->Next; assert(Node->Val != Inst && "Inst still in value numbering scope!"); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index c2bd6e69ee..b36a3cb776 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -12,7 +12,7 @@ // global). Such a transformation can significantly reduce the register pressure // when many globals are involved. // -// For example, consider the code which touches several global variables at +// For example, consider the code which touches several global variables at // once: // // static int foo[N], bar[N], baz[N]; @@ -208,8 +208,8 @@ bool GlobalMerge::doInitialization(Module &M) { if (BSSGlobals.size() > 1) Changed |= doMerge(BSSGlobals, M, false); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore // them in the future. // if (ConstGlobals.size() > 1) // Changed |= doMerge(ConstGlobals, M, true); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ba6579db..37f8bdfbff 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1215,21 +1215,26 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { return 0; } -/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show -/// that the current exit test is already sufficiently canonical. -static bool needsLFTR(Loop *L, DominatorTree *DT) { +/// Return the compare guarding the loop latch, or NULL for unrecognized tests. +static ICmpInst *getLoopTest(Loop *L) { assert(L->getExitingBlock() && "expected loop exit"); BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return false; + return 0; BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); + return dyn_cast<ICmpInst>(BI->getCondition()); +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { // Do LFTR to simplify the exit condition to an ICMP. - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + ICmpInst *Cond = getLoopTest(L); if (!Cond) return true; @@ -1259,6 +1264,48 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) { return Phi != getLoopPhiForCounter(IncV, L, DT); } +/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils +/// down to checking that all operands are constant and listing instructions +/// that may hide undef. +static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited, + unsigned Depth) { + if (isa<Constant>(V)) + return !isa<UndefValue>(V); + + if (Depth >= 6) + return false; + + // Conservatively handle non-constant non-instructions. For example, Arguments + // may be undef. + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // Load and return values may be undef. + if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) + return false; + + // Optimistically handle other instructions. + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + if (!Visited.insert(*OI)) + continue; + if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) + return false; + } + return true; +} + +/// Return true if the given value is concrete. We must prove that undef can +/// never reach it. +/// +/// TODO: If we decide that this is a good approach to checking for undef, we +/// may factor it into a common location. +static bool hasConcreteDef(Value *V) { + SmallPtrSet<Value*, 8> Visited; + Visited.insert(V); + return hasConcreteDefImpl(V, Visited, 0); +} + /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to /// be rewritten) loop exit test. static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { @@ -1283,6 +1330,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. /// +/// Currently only valid for LFTR. See the comments on hasConcreteDef below. +/// /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount /// /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. @@ -1331,6 +1380,19 @@ FindLoopCounter(Loop *L, const SCEV *BECount, if (getLoopPhiForCounter(IncV, L, DT) != Phi) continue; + // Avoid reusing a potentially undef value to compute other values that may + // have originally had a concrete definition. + if (!hasConcreteDef(Phi)) { + // We explicitly allow unknown phis as long as they are already used by + // the loop test. In this case we assume that performing LFTR could not + // increase the number of undef users. + if (ICmpInst *Cond = getLoopTest(L)) { + if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) + && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) { + continue; + } + } + } const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -1347,7 +1409,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount, // If two IVs both count from zero or both count from nonzero then the // narrower is likely a dead phi that has been widened. Use the wider phi // to allow the other to be eliminated. - if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) continue; } BestPhi = Phi; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 429b61b6e5..dd42c59059 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -670,6 +670,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) { Condition = SI->getCondition(); } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) { + // Can't thread indirect branch with no successors. + if (IB->getNumSuccessors() == 0) return false; Condition = IB->getAddress()->stripPointerCasts(); Preference = WantBlockAddress; } else { @@ -859,7 +861,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If all of the loads and stores that feed the value have the same TBAA tag, // then we can propagate it onto any newly inserted loads. - MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); + MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); SmallPtrSet<BasicBlock*, 8> PredsScanned; typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; @@ -885,7 +887,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { OneUnavailablePred = PredBB; continue; } - + // If tbaa tags disagree or are not present, forget about them. if (TBAATag != ThisTBAATag) TBAATag = 0; @@ -949,7 +951,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { NewVal->setDebugLoc(LI->getDebugLoc()); if (TBAATag) NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag); - + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 582948ea14..0192e928fe 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -175,7 +175,9 @@ namespace { bool canSinkOrHoistInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - void PromoteAliasSet(AliasSet &AS); + void PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts); }; } @@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I); + PromoteAliasSet(*I, ExitBlocks, InsertPts); } // Clear out loops state information for the next iteration @@ -631,6 +636,7 @@ namespace { Value *SomePtr; // Designated pointer to store to. SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + SmallVectorImpl<Instruction*> &LoopInsertPts; AliasSetTracker &AST; DebugLoc DL; int Alignment; @@ -638,11 +644,12 @@ namespace { LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl, int alignment) + SmallVectorImpl<BasicBlock*> &LEB, + SmallVectorImpl<Instruction*> &LIP, + AliasSetTracker &ast, DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), - Alignment(alignment) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), + AST(ast), DL(dl), Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -662,7 +669,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -684,7 +691,9 @@ namespace { /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// -void LICM::PromoteAliasSet(AliasSet &AS) { +void LICM::PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.resize(ExitBlocks.size()); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + } // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index f7f32981ba..3771f5aa97 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -32,10 +32,10 @@ namespace { LoopDeletion() : LoopPass(ID) { initializeLoopDeletionPass(*PassRegistry::getPassRegistry()); } - + // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); - + bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, SmallVector<BasicBlock*, 4>& exitBlocks, bool &Changed, BasicBlock *Preheader); @@ -46,7 +46,7 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - + AU.addPreserved<ScalarEvolution>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<LoopInfo>(); @@ -55,7 +55,7 @@ namespace { } }; } - + char LoopDeletion::ID = 0; INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) @@ -79,7 +79,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitBlocks, bool &Changed, BasicBlock *Preheader) { BasicBlock* exitBlock = exitBlocks[0]; - + // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop // must pass through a PHI in the exit block, meaning that this check is @@ -97,14 +97,14 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - + if (Instruction* I = dyn_cast<Instruction>(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; ++BI; } - + // Make sure that no instructions in the block have potential side-effects. // This includes instructions that could write to memory, and loads that are // marked volatile. This could be made more aggressive by using aliasing @@ -117,23 +117,23 @@ bool LoopDeletion::IsLoopDead(Loop* L, return false; } } - + return true; } /// runOnLoop - Remove dead loops, by which we mean loops that do not impact the -/// observable behavior of the program other than finite running time. Note +/// observable behavior of the program other than finite running time. Note /// we do ensure that this never remove a loop that might be infinite, as doing /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { - // We can only remove the loop if there is a preheader that we can + // We can only remove the loop if there is a preheader that we can // branch from after removing it. BasicBlock* preheader = L->getLoopPreheader(); if (!preheader) return false; - + // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; @@ -142,36 +142,36 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) return false; - + SmallVector<BasicBlock*, 4> exitingBlocks; L->getExitingBlocks(exitingBlocks); - + SmallVector<BasicBlock*, 4> exitBlocks; L->getUniqueExitBlocks(exitBlocks); - + // We require that the loop only have a single exit block. Otherwise, we'd // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. if (exitBlocks.size() != 1) return false; - + // Finally, we have to check that the loop really is dead. bool Changed = false; if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; - + // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; - + // Now that we know the removal is safe, remove the loop by changing the - // branch from the preheader to go to the single exit block. + // branch from the preheader to go to the single exit block. BasicBlock* exitBlock = exitBlocks[0]; - + // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. @@ -197,7 +197,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { P->removeIncomingValue(exitingBlocks[i]); ++BI; } - + // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree& DT = getAnalysis<DominatorTree>(); @@ -211,7 +211,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); } - + ChildNodes.clear(); DT.eraseNode(*LI); @@ -219,7 +219,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // delete it freely later. (*LI)->dropAllReferences(); } - + // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its @@ -236,13 +236,13 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), E = blocks.end(); I != E; ++I) loopInfo.removeBlock(*I); - + // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); Changed = true; - + ++NumDeleted; - + return Changed; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5fe9462159..ac1082cbfb 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -173,7 +173,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - // Disable loop idiom recognition if the function's name is a common idiom. + // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); if (Name == "memset" || Name == "memcpy") return false; diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index f0f05e6f50..982400c5a3 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -48,7 +48,7 @@ namespace { } }; } - + char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", "Simplify instructions in loops", false, false) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 59aace9e36..7eeb1527ad 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -418,12 +418,13 @@ bool LoopRotate::rotateLoop(Loop *L) { } // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and - // thus is not a preheader anymore. Split the edge to form a real preheader. + // thus is not a preheader anymore. + // Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); NewPH->setName(NewHeader->getName() + ".lr.ph"); - // Preserve canonical loop form, which means that 'Exit' should have only one - // predecessor. + // Preserve canonical loop form, which means that 'Exit' should have only + // one predecessor. BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); ExitSplit->moveBefore(Exit); } else { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ba969e675..0ae7a5151e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); + Value *V = DeadInsts.pop_back_val(); + Instruction *I = dyn_cast_or_null<Instruction>(V); if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -2836,7 +2837,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L)) { + if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, 0, @@ -3006,42 +3007,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// CollectSubexprs - Split S into subexpressions which can be pulled out into /// separate registers. If C is non-null, multiply each subexpression by C. -static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, - SmallVectorImpl<const SCEV *> &Ops, - const Loop *L, - ScalarEvolution &SE) { +/// +/// Return remainder expression after factoring the subexpressions captured by +/// Ops. If Ops is complete, return NULL. +static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, + SmallVectorImpl<const SCEV *> &Ops, + const Loop *L, + ScalarEvolution &SE, + unsigned Depth = 0) { + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return S; + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - CollectSubexprs(*I, C, Ops, L, SE); - return; + I != E; ++I) { + const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1); + if (Remainder) + Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); + } + return NULL; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. - if (!AR->getStart()->isZero()) { - CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), - AR->getStepRecurrence(SE), - AR->getLoop(), - //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) - SCEV::FlagAnyWrap), - C, Ops, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, L, SE); - return; + if (AR->getStart()->isZero()) + return S; + + const SCEV *Remainder = CollectSubexprs(AR->getStart(), + C, Ops, L, SE, Depth+1); + // Split the non-zero AddRec unless it is part of a nested recurrence that + // does not pertain to this loop. + if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) { + Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); + Remainder = NULL; + } + if (Remainder != AR->getStart()) { + if (!Remainder) + Remainder = SE.getConstant(AR->getType(), 0); + return SE.getAddRecExpr(Remainder, + AR->getStepRecurrence(SE), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { // Break (C * (a + b + c)) into C*a + C*b + C*c. - if (Mul->getNumOperands() == 2) - if (const SCEVConstant *Op0 = - dyn_cast<SCEVConstant>(Mul->getOperand(0))) { - CollectSubexprs(Mul->getOperand(1), - C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, L, SE); - return; - } + if (Mul->getNumOperands() != 2) + return S; + if (const SCEVConstant *Op0 = + dyn_cast<SCEVConstant>(Mul->getOperand(0))) { + C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0; + const SCEV *Remainder = + CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); + if (Remainder) + Ops.push_back(SE.getMulExpr(C, Remainder)); + return NULL; + } } - - // Otherwise use the value itself, optionally with a scale applied. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + return S; } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -3056,7 +3079,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, const SCEV *BaseReg = Base.BaseRegs[i]; SmallVector<const SCEV *, 8> AddOps; - CollectSubexprs(BaseReg, 0, AddOps, L, SE); + const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); if (AddOps.size() == 1) continue; diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 221911866c..7419a6543e 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -25,12 +25,12 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Ptr = CXI->getPointerOperand(); Value *Cmp = CXI->getCompareOperand(); Value *Val = CXI->getNewValOperand(); - + LoadInst *Orig = Builder.CreateLoad(Ptr); Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - + CXI->replaceAllUsesWith(Orig); CXI->eraseFromParent(); return true; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 052cc3dac0..2a5ee33eb1 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -44,7 +44,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) /*skip along*/; - + // Compute the offset implied by the rest of the indices. int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { @@ -58,7 +58,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } - + // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -77,7 +77,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, Ptr2 = Ptr2->stripPointerCasts(); GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); - + bool VariableIdxFound = false; // If one pointer is a GEP and the other isn't, then see if the GEP is a @@ -91,7 +91,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } - + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant @@ -99,7 +99,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // handle no other case. if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) return false; - + // Skip any common indices and track the GEP types. unsigned Idx = 1; for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) @@ -109,7 +109,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); if (VariableIdxFound) return false; - + Offset = Offset2-Offset1; return true; } @@ -128,19 +128,19 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, namespace { struct MemsetRange { // Start/End - A semi range that describes the span that this range covers. - // The range is closed at the start and open at the end: [Start, End). + // The range is closed at the start and open at the end: [Start, End). int64_t Start, End; /// StartPtr - The getelementptr instruction that points to the start of the /// range. Value *StartPtr; - + /// Alignment - The known alignment of the first store. unsigned Alignment; - + /// TheStores - The actual stores that make up this range. SmallVector<Instruction*, 16> TheStores; - + bool isProfitableToUseMemset(const TargetData &TD) const; }; @@ -152,17 +152,17 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; - + // If any of the stores are a memset, then it is always good to extend the // memset. for (unsigned i = 0, e = TheStores.size(); i != e; ++i) if (!isa<StoreInst>(TheStores[i])) return true; - + // Assume that the code generator is capable of merging pairs of stores // together if it wants to. if (TheStores.size() == 2) return false; - + // If we have fewer than 8 stores, it can still be worthwhile to do this. // For example, merging 4 i8 stores into an i32 store is useful almost always. // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the @@ -175,15 +175,15 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // actually reducing the number of stores used. unsigned Bytes = unsigned(End-Start); unsigned NumPointerStores = Bytes/TD.getPointerSize(); - + // Assume the remaining bytes if any are done a byte at a time. unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); - + // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 // etc. return TheStores.size() > NumPointerStores+NumByteStores; -} +} namespace { @@ -195,12 +195,12 @@ class MemsetRanges { const TargetData &TD; public: MemsetRanges(const TargetData &td) : TD(td) {} - + typedef std::list<MemsetRange>::const_iterator const_iterator; const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } - + void addInst(int64_t OffsetFromFirst, Instruction *Inst) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) addStore(OffsetFromFirst, SI); @@ -210,21 +210,21 @@ public: void addStore(int64_t OffsetFromFirst, StoreInst *SI) { int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); - + addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(), SI->getAlignment(), SI); } - + void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); } - + void addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst); }; - + } // end anon namespace @@ -240,10 +240,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst) { int64_t End = Start+Size; range_iterator I = Ranges.begin(), E = Ranges.end(); - + while (I != E && Start > I->End) ++I; - + // We now know that I == E, in which case we didn't find anything to merge // with, or that Start <= I->End. If End < I->Start or I == E, then we need // to insert a new range. Handle this now. @@ -256,18 +256,18 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, R.TheStores.push_back(Inst); return; } - + // This store overlaps with I, add it. I->TheStores.push_back(Inst); - + // At this point, we may have an interval that completely contains our store. // If so, just add it to the interval and return. if (I->Start <= Start && I->End >= End) return; - + // Now we know that Start <= I->End and End >= I->Start so the range overlaps // but is not entirely contained within the range. - + // See if the range extends the start of the range. In this case, it couldn't // possibly cause it to join the prior range, because otherwise we would have // stopped on *it*. @@ -276,7 +276,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, I->StartPtr = Ptr; I->Alignment = Alignment; } - + // Now we know that Start <= I->End and Start >= I->Start (so the startpoint // is in or right at the end of I), and that End >= I->Start. Extend I out to // End. @@ -325,7 +325,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } - + // Helper fuctions bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); @@ -341,7 +341,7 @@ namespace { bool iterateOnFunction(Function &F); }; - + char MemCpyOpt::ID = 0; } @@ -361,16 +361,16 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consecutive ones, it /// attempts to merge them together into a memcpy/memset. -Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, +Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { if (TD == 0) return 0; - + // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); - + BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { @@ -381,43 +381,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, break; continue; } - + if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; - + // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; - + // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; - + Ranges.addStore(Offset, NextStore); } else { MemSetInst *MSI = cast<MemSetInst>(BI); - + if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength())) break; - + // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) break; - + Ranges.addMemSet(Offset, MSI); } } - + // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return 0; - + // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. @@ -434,28 +434,28 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; - + if (Range.TheStores.size() == 1) continue; - + // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; - + // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; - + // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { - Type *EltType = + Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } - - AMemSet = + + AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; @@ -473,14 +473,14 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, } ++NumMemSetInfer; } - + return AMemSet; } bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - + if (TD == 0) return false; // Detect cases where we're performing call slot forwarding, but @@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { bool changed = performCallSlotOptzn(LI, - SI->getPointerOperand()->stripPointerCasts(), + SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); if (changed) { @@ -524,10 +524,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } } } - + // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. - + // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. @@ -537,7 +537,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { BBI = I; // Don't invalidate iterator. return true; } - + return false; } @@ -680,7 +680,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else - CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } @@ -701,14 +701,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, /// processMemCpyMemCpyDependence - We've found that the (upward scanning) /// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to /// copy from MDep's input if we can. MSize is the size of M's copy. -/// +/// bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize) { // We can only transforms memcpy's where the dest of one is the source of the // other. if (M->getSource() != MDep->getDest() || MDep->isVolatile()) return false; - + // If dep instruction is reading from our current input, then it is a noop // transfer and substituting the input won't change this instruction. Just // ignore the input and let someone else zap MDep. This handles cases like: @@ -716,14 +716,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, // memcpy(b <- a) if (M->getSource() == MDep->getSource()) return false; - + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; - + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Verify that the copied-from memory doesn't change in between the two @@ -743,23 +743,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, false, M, M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; - + // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) UseMemMove = true; - + // If all checks passed, then we can transform M. - + // Make sure to use the lesser of the alignment of the source and the dest // since we're changing where we're reading from, but don't want to increase // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - + IRBuilder<> Builder(M); if (UseMemMove) Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), @@ -839,13 +839,13 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { if (!TLI->has(LibFunc::memmove)) return false; - + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; - + DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); - + // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); Type *ArgTys[3] = { M->getRawDest()->getType(), @@ -861,7 +861,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { ++NumMoveToCpy; return true; } - + /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (TD == 0) return false; @@ -884,7 +884,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (MDep == 0 || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; - + // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) @@ -894,13 +894,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); if (ByValAlign == 0) return false; - + // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. if (MDep->getAlignment() < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) return false; - + // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. // memcpy(a <- b) @@ -915,16 +915,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { false, CS.getInstruction(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; - + Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), "tmpcast", CS.getInstruction()); - + DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" << " " << *MDep << "\n" << " " << *CS.getInstruction() << "\n"); - + // Otherwise we're good! Update the byval argument. CS.setArgument(ArgNo, TmpCast); ++NumMemCpyInstr; @@ -940,9 +940,9 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { // Avoid invalidating the iterator. Instruction *I = BI++; - + bool RepeatInstruction = false; - + if (StoreInst *SI = dyn_cast<StoreInst>(I)) MadeChange |= processStore(SI, BI); else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) @@ -964,7 +964,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { } } } - + return MadeChange; } @@ -976,19 +976,19 @@ bool MemCpyOpt::runOnFunction(Function &F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); TLI = &getAnalysis<TargetLibraryInfo>(); - + // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) return false; - + while (1) { if (!iterateOnFunction(F)) break; MadeChange = true; } - + MD = 0; return MadeChange; } diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 87e17c7f90..3222f2083b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -380,14 +380,14 @@ static InstructionClass GetBasicInstructionClass(const Value *V) { return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User; } -/// IsRetain - Test if the the given class is objc_retain or +/// IsRetain - Test if the given class is objc_retain or /// equivalent. static bool IsRetain(InstructionClass Class) { return Class == IC_Retain || Class == IC_RetainRV; } -/// IsAutorelease - Test if the the given class is objc_autorelease or +/// IsAutorelease - Test if the given class is objc_autorelease or /// equivalent. static bool IsAutorelease(InstructionClass Class) { return Class == IC_Autorelease || @@ -2829,7 +2829,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, } S.IncrementNestCount(); - return NestingDetected; + + // A retain can be a potential use; procede to the generic checking + // code below. + break; } case IC_Release: { Arg = GetObjCArg(Inst); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index bcf34b5256..09687d8909 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -375,7 +375,7 @@ typedef std::pair<Value*, APInt> RepeatedValue; /// nodes in Ops along with their weights (how many times the leaf occurs). The /// original expression is the same as /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times -/// op +/// op /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times /// op /// ... @@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I, // Update the number of paths to the leaf. IncorporateWeight(It->second, Weight, Opcode); +#if 0 // TODO: Re-enable once PR13021 is fixed. // The leaf already has one use from inside the expression. As we want // exactly one such use, drop this new use of the leaf. assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); @@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I, Leaves.erase(It); continue; } +#endif // If we still have uses that are not accounted for by the expression // then it is not safe to modify the value. @@ -1581,7 +1583,8 @@ void Reassociate::OptimizeInst(Instruction *I) { // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. - if (BO->hasOneUse() && BO->use_back()->getOpcode() == BO->getOpcode()) + unsigned Opcode = BO->getOpcode(); + if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode) return; // If this is an add tree that is used by a sub instruction, ignore it diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 98b0d5f6d5..ea1de63de7 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -59,7 +59,7 @@ namespace { virtual bool runOnFunction(Function &F); }; } - + char RegToMem::ID = 0; INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots", false, false) @@ -68,25 +68,25 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots", false, false) bool RegToMem::runOnFunction(Function &F) { - if (F.isDeclaration()) + if (F.isDeclaration()) return false; - + // Insert all new allocas into entry block. BasicBlock *BBEntry = &F.getEntryBlock(); assert(pred_begin(BBEntry) == pred_end(BBEntry) && "Entry block to function must not have predecessors!"); - + // Find first non-alloca instruction and create insertion point. This is // safe if block is well-formed: it always have terminator, otherwise // we'll get and assertion. BasicBlock::iterator I = BBEntry->begin(); while (isa<AllocaInst>(I)) ++I; - + CastInst *AllocaInsertionPoint = new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), Type::getInt32Ty(F.getContext()), "reg2mem alloca point", I); - + // Find the escaped instructions. But don't create stack slots for // allocas in entry block. std::list<Instruction*> WorkList; @@ -99,15 +99,15 @@ bool RegToMem::runOnFunction(Function &F) { WorkList.push_front(&*iib); } } - + // Demote escaped instructions NumRegsDemoted += WorkList.size(); - for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemoteRegToStack(**ilb, false, AllocaInsertionPoint); - + WorkList.clear(); - + // Find all phi's for (Function::iterator ibb = F.begin(), ibe = F.end(); ibb != ibe; ++ibb) @@ -115,19 +115,18 @@ bool RegToMem::runOnFunction(Function &F) { iib != iie; ++iib) if (isa<PHINode>(iib)) WorkList.push_front(&*iib); - + // Demote phi nodes NumPhisDemoted += WorkList.size(); - for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); - + return true; } // createDemoteRegisterToMemory - Provide an entry point to create this pass. -// char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { return new RegToMem(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 16b64a500b..2c39aab5cd 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -409,7 +409,7 @@ private: if (Constant *C = dyn_cast<Constant>(V)) { Constant *Elt = C->getAggregateElement(i); - + if (Elt == 0) LV.markOverdefined(); // Unknown sort of constant. else if (isa<UndefValue>(Elt)) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 49970d4576..48318c8a55 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements common infrastructure for libLLVMScalarOpts.a, which +// This file implements common infrastructure for libLLVMScalarOpts.a, which // implements several scalar transformations over the LLVM intermediate // representation, including the C bindings for that library. // @@ -24,12 +24,11 @@ using namespace llvm; -/// initializeScalarOptsPasses - Initialize all passes linked into the +/// initializeScalarOptsPasses - Initialize all passes linked into the /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); initializeBlockPlacementPass(Registry); - initializeBoundsCheckingPass(Registry); initializeCodeGenPreparePass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e3e3c9eb17..8090fdf178 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); -STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { @@ -104,7 +103,7 @@ namespace { /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite /// looping and avoid redundant work. SmallPtrSet<PHINode*, 8> CheckedPHIs; - + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; @@ -118,12 +117,12 @@ namespace { /// ever accessed, or false if the alloca is only accessed with mem /// intrinsics or load/store that only access the entire alloca at once. bool hasSubelementAccess : 1; - + /// hasALoadOrStore - This is true if there are any loads or stores to it. /// The alloca may just be accessed with memcpy, for example, which would /// not set this. bool hasALoadOrStore : 1; - + explicit AllocaInfo(AllocaInst *ai) : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), hasSubelementAccess(false), hasALoadOrStore(false) {} @@ -183,11 +182,8 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); bool ShouldAttemptScalarRepl(AllocaInst *AI); - - static MemTransferInst *isOnlyCopiedFromConstantGlobal( - AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; - + // SROA_DT - SROA that uses DominatorTree. struct SROA_DT : public SROA { static char ID; @@ -196,7 +192,7 @@ namespace { SROA(T, true, ID, ST, AT, SLT) { initializeSROA_DTPass(*PassRegistry::getPassRegistry()); } - + // getAnalysisUsage - This pass does not require any passes, but we know it // will not alter the CFG, so say so. virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -204,7 +200,7 @@ namespace { AU.setPreservesCFG(); } }; - + // SROA_SSAUp - SROA that uses SSAUpdater. struct SROA_SSAUp : public SROA { static char ID; @@ -213,14 +209,14 @@ namespace { SROA(T, false, ID, ST, AT, SLT) { initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); } - + // getAnalysisUsage - This pass does not require any passes, but we know it // will not alter the CFG, so say so. virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } }; - + } char SROA_DT::ID = 0; @@ -294,7 +290,7 @@ class ConvertToScalarInfo { /// isn't possible to turn into a vector type, it gets set to VoidTy. VectorType *VectorTy; - /// HadNonMemTransferAccess - True if there is at least one access to the + /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into /// large integers unless there is some potential for optimization. bool HadNonMemTransferAccess; @@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->hasAllConstantIndices()) - NonConstantIdx = Indices.pop_back_val(); + Value* GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + assert(!NonConstantIdx && + "Dynamic GEP reading from dynamic GEP unsupported"); + GEPNonConstantIdx = Indices.pop_back_val(); + } else + GEPNonConstantIdx = NonConstantIdx; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); GEP->eraseFromParent(); continue; } @@ -1050,7 +1051,7 @@ public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, DIBuilder *DB) : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} - + void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). this->AI = AI; @@ -1065,18 +1066,18 @@ public: LoadAndStorePromoter::run(Insts); AI->eraseFromParent(); - for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), + for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), E = DDIs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; DDI->eraseFromParent(); } - for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), + for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; DVI->eraseFromParent(); } } - + virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &Insts) const { if (LoadInst *LI = dyn_cast<LoadInst>(I)) @@ -1085,7 +1086,7 @@ public: } virtual void updateDebugInfo(Instruction *Inst) const { - for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), E = DDIs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) @@ -1093,7 +1094,7 @@ public: else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); } - for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; Value *Arg = NULL; @@ -1136,12 +1137,12 @@ public: static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); - + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); if (LI == 0 || !LI->isSimple()) return false; - + // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, @@ -1151,7 +1152,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { LI->getAlignment(), TD)) return false; } - + return true; } @@ -1182,20 +1183,20 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); if (LI == 0 || !LI->isSimple()) return false; - + // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. if (LI->getParent() != BB) return false; - + // Ensure that there are no instructions between the PHI and the load that // could store. for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) if (BBI->mayWriteToMemory()) return false; - + MaxAlign = std::max(MaxAlign, LI->getAlignment()); } - + // Okay, we know that we have one or more loads in the same block as the PHI. // We can transform this if it is safe to push the loads into the predecessor // blocks. The only thing to watch out for is that we can't put a possibly @@ -1223,10 +1224,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { if (InVal->isDereferenceablePointer() || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) continue; - + return false; } - + return true; } @@ -1238,7 +1239,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { SetVector<Instruction*, SmallVector<Instruction*, 4>, SmallPtrSet<Instruction*, 4> > InstsToRewrite; - + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); UI != UE; ++UI) { User *U = *UI; @@ -1247,7 +1248,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { return false; continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI || !SI->isSimple()) return false; // Don't allow a store OF the AI, only INTO the AI. @@ -1261,7 +1262,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { Value *Result = SI->getOperand(1+CI->isZero()); SI->replaceAllUsesWith(Result); SI->eraseFromParent(); - + // This is very rare and we just scrambled the use list of AI, start // over completely. return tryToMakeAllocaBePromotable(AI, TD); @@ -1271,33 +1272,33 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // loads, then we can transform this by rewriting the select. if (!isSafeSelectToSpeculate(SI, TD)) return false; - + InstsToRewrite.insert(SI); continue; } - + if (PHINode *PN = dyn_cast<PHINode>(U)) { if (PN->use_empty()) { // Dead PHIs can be stripped. InstsToRewrite.insert(PN); continue; } - + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads // in the pred blocks, then we can transform this by rewriting the PHI. if (!isSafePHIToSpeculate(PN, TD)) return false; - + InstsToRewrite.insert(PN); continue; } - + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { if (onlyUsedByLifetimeMarkers(BCI)) { InstsToRewrite.insert(BCI); continue; } } - + return false; } @@ -1305,7 +1306,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // we're done! if (InstsToRewrite.empty()) return true; - + // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { @@ -1326,13 +1327,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // loads with a new select. while (!SI->use_empty()) { LoadInst *LI = cast<LoadInst>(SI->use_back()); - + IRBuilder<> Builder(LI); - LoadInst *TrueLoad = + LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); - LoadInst *FalseLoad = + LoadInst *FalseLoad = Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); - + // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); FalseLoad->setAlignment(LI->getAlignment()); @@ -1340,18 +1341,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); } - + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); V->takeName(LI); LI->replaceAllUsesWith(V); LI->eraseFromParent(); } - + // Now that all the loads are gone, the select is gone too. SI->eraseFromParent(); continue; } - + // Otherwise, we have a PHI node which allows us to push the loads into the // predecessors. PHINode *PN = cast<PHINode>(InstsToRewrite[i]); @@ -1359,7 +1360,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { PN->eraseFromParent(); continue; } - + Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), PN->getName()+".ld", PN); @@ -1369,18 +1370,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *SomeLoad = cast<LoadInst>(PN->use_back()); MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); unsigned Align = SomeLoad->getAlignment(); - + // Rewrite all loads of the PN to use the new PHI. while (!PN->use_empty()) { LoadInst *LI = cast<LoadInst>(PN->use_back()); LI->replaceAllUsesWith(NewPN); LI->eraseFromParent(); } - + // Inject loads into all of the pred blocks. Keep track of which blocks we // insert them into in case we have multiple edges from the same block. DenseMap<BasicBlock*, LoadInst*> InsertedLoads; - + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); LoadInst *&Load = InsertedLoads[Pred]; @@ -1391,13 +1392,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { Load->setAlignment(Align); if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); } - + NewPN->addIncoming(Load, Pred); } - + PN->eraseFromParent(); } - + ++NumAdjusted; return true; } @@ -1430,7 +1431,7 @@ bool SROA::performPromotion(Function &F) { SSAUpdater SSA; for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { AllocaInst *AI = Allocas[i]; - + // Build list of instructions to promote. for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) @@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast<GEPOperator>(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - - // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them // if they are only used by getelementptr instructions. @@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) { if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; - } - } - // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -1667,12 +1625,12 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; - + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, true /*AllowWholeAccess*/); @@ -1689,7 +1647,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, if (Info.isUnsafe) return; } } - + /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer /// derived from the alloca, we can often still split the alloca into elements. @@ -1706,10 +1664,10 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (PHINode *PN = dyn_cast<PHINode>(I)) if (!Info.CheckedPHIs.insert(PN)) return; - + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); - + if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { isSafePHISelectUseForScalarRepl(BC, Offset, Info); } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { @@ -1726,12 +1684,12 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; - + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, false /*AllowWholeAccess*/); @@ -1925,12 +1883,12 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, RewriteBitCast(BC, AI, Offset, NewElts); continue; } - + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { RewriteGEP(GEPI, AI, Offset, NewElts); continue; } - + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); @@ -1949,10 +1907,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { Type *LIType = LI->getType(); - + if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: // %res = load { i32, i32 }* %alloc @@ -1978,7 +1936,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); Type *SIType = Val->getType(); @@ -2005,16 +1963,16 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (isa<SelectInst>(User) || isa<PHINode>(User)) { - // If we have a PHI user of the alloca itself (as opposed to a GEP or + // If we have a PHI user of the alloca itself (as opposed to a GEP or // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to // the new pointer. if (!isa<AllocaInst>(I)) continue; - + assert(Offset == 0 && NewElts[0] && "Direct alloca use should have a zero offset"); - + // If we have a use of the alloca, we know the derived uses will be // utilizing just the first element of the scalarized result. Insert a // bitcast of the first alloca before the user as required. @@ -2386,7 +2344,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); - + // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = Builder.CreateZExt(SrcVal, @@ -2648,137 +2606,6 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return false; } } - - return true; -} - - - -/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to -/// some part of a constant global variable. This intentionally only accepts -/// constant expressions because we don't can't rewrite arbitrary instructions. -static bool PointsToConstantGlobal(Value *V) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr) - return PointsToConstantGlobal(CE->getOperand(0)); - return false; -} - -/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) -/// pointer to an alloca. Ignore any reads of the pointer, return false if we -/// see any stores or other unknown uses. If we see pointer arithmetic, keep -/// track of whether it moves the pointer (with isOffset) but otherwise traverse -/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we -/// can optimize this. -static bool -isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset, - SmallVector<Instruction *, 4> &LifetimeMarkers) { - // We track lifetime intrinsics as we encounter them. If we decide to go - // ahead and replace the value with the global, this lets the caller quickly - // eliminate the markers. - - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - User *U = cast<Instruction>(*UI); - - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, - LifetimeMarkers)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices(), - LifetimeMarkers)) - return false; - continue; - } - - if (CallSite CS = U) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(UI)) - continue; - - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; - - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } - - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - LifetimeMarkers.push_back(II); - continue; - } - } - - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast<MemTransferInst>(U); - if (MI == 0) - return false; - - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (UI.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } - - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (isOffset) return false; - - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 0) return false; - - // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getSource())) - return false; - - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; - } return true; } - -/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only -/// modified by a copy from a constant global. If we can prove this, we can -/// replace any uses of the alloca with uses of the global directly. -MemTransferInst * -SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, - SmallVector<Instruction*, 4> &ToDelete) { - MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) - return TheCopy; - return 0; -} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 91158b429e..d13e4abff9 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -67,7 +67,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); - + // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. if (UseLLVMTrap) { @@ -77,7 +77,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); - + // All instructions after this are dead. BasicBlock::iterator BBI = I, BBE = BB->end(); while (BBI != BBE) { @@ -107,13 +107,13 @@ static void ChangeToCall(InvokeInst *II) { static bool MarkAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { - + SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); - + if (!Reachable.insert(BB)) continue; @@ -135,7 +135,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, break; } } - + // Store to undef and store to null are undefined and used to signal that // they should be changed to unreachable by passes that can't modify the // CFG. @@ -144,7 +144,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (SI->isVolatile()) continue; Value *Ptr = SI->getOperand(1); - + if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { @@ -165,6 +165,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else ChangeToCall(II); @@ -179,38 +180,38 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even -/// if they are in a dead cycle. Return true if a change was made, false +/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false /// otherwise. static bool RemoveUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; bool Changed = MarkAliveBlocks(F.begin(), Reachable); - + // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) return Changed; - + assert(Reachable.size() < F.size()); NumSimpl += F.size()-Reachable.size(); - + // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references... for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { if (Reachable.count(BB)) continue; - + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(BB); BB->dropAllReferences(); } - + for (Function::iterator I = ++F.begin(); I != F.end();) if (!Reachable.count(I)) I = F.getBasicBlockList().erase(I); else ++I; - + return true; } @@ -218,17 +219,17 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { /// node) return blocks, merge them together to promote recursive block merging. static bool MergeEmptyReturnBlocks(Function &F) { bool Changed = false; - + BasicBlock *RetBlock = 0; - + // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { BasicBlock &BB = *BBI++; - + // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); if (Ret == 0) continue; - + // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. if (Ret != &BB.front()) { @@ -250,21 +251,21 @@ static bool MergeEmptyReturnBlocks(Function &F) { RetBlock = &BB; continue; } - + // Otherwise, we found a duplicate return block. Merge the two. Changed = true; - + // Case when there is no input to the return or when the returned values // agree is trivial. Note that they can't agree if there are phis in the // blocks. if (Ret->getNumOperands() == 0 || - Ret->getOperand(0) == + Ret->getOperand(0) == cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { BB.replaceAllUsesWith(RetBlock); BB.eraseFromParent(); continue; } - + // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { @@ -273,12 +274,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), std::distance(PB, PE), "merge", &RetBlock->front()); - + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } - + // Turn BB into a block that just unconditionally branches to the return // block. This handles the case when the two return blocks have a common // predecessor but that return different things. @@ -286,7 +287,7 @@ static bool MergeEmptyReturnBlocks(Function &F) { BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); } - + return Changed; } @@ -297,7 +298,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { @@ -326,7 +327,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of + // avoid reruning IterativeSimplifyCFG if the second pass of // RemoveUnreachableBlocksFromFn doesn't do anything. if (!RemoveUnreachableBlocksFromFn(F)) return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 39647c7fc6..3904419012 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -100,7 +100,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } - + static bool CallHasFloatingPointArgument(const CallInst *CI) { for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); it != e; ++it) { @@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitStrLenMemCpy(Src, Dst, Len, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, Len, B); } - void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD); + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization { // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; } }; @@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt { // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further - EmitStrLenMemCpy(Src, Dst, SrcLen, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, SrcLen, B); } }; @@ -254,9 +255,9 @@ struct StrChrOpt : public LibCallOptimization { return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD); + B, TD, TLI); } - + // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; @@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD); + return EmitStrChr(SrcStr, '\0', B, TD, TLI); return 0; } @@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD); + std::min(Len1, Len2)), B, TD, TLI); } return 0; @@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || + !EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getArgOperand(2), B, TD, TLI)) B.CreateMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; @@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization { if (!TD) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // stpcpy(x,x) -> x+strlen(x) - return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD)); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, + TD, TLI)) B.CreateMemCpy(Dst, Src, LenV, 1); return DstEnd; } @@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization { // strpbrk(s, "a") -> strchr(s, 'a') if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); return 0; } @@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization { // strcspn(s, "") -> strlen(s) if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD); + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); return 0; } @@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD); + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { ICmpInst *Old = cast<ICmpInst>(*UI++); @@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization { } // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), - ToFindStr[0], B, TD), CI->getType()); + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } return 0; } }; @@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - EmitPutS(GV, B, TD); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), FormatStr.size()+1); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); - if (CI->use_empty()) return CI; + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - EmitPutS(CI->getArgOperand(1), B, TD); - return CI; + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); } return 0; } @@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - EmitFPutC(Char, CI->getArgOperand(3), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } return 0; @@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization { // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - return CI; // Known to have no uses (see above). + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); } }; @@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return ConstantInt::get(CI->getType(), FormatStr.size()); + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - return CI; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } return 0; } @@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1514,7 +1522,7 @@ namespace { /// class SimplifyLibCalls : public FunctionPass { TargetLibraryInfo *TLI; - + StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; @@ -1534,7 +1542,7 @@ namespace { SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; PutsOpt Puts; - + bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification @@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; + if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) + Optimizations["fabs"] = &UnaryDoubleFP; if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) Optimizations["floor"] = &UnaryDoubleFP; if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) @@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["rint"] = &UnaryDoubleFP; if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) Optimizations["nearbyint"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) + Optimizations["trunc"] = &UnaryDoubleFP; // Integer Optimizations Optimizations["ffs"] = &FFS; @@ -1767,7 +1779,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { FunctionType *FTy = F.getFunctionType(); - + StringRef Name = F.getName(); switch (Name[0]) { case 's': diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index d6ec65169d..34f1d6c622 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -40,9 +40,9 @@ namespace { Sinking() : FunctionPass(ID) { initializeSinkingPass(*PassRegistry::getPassRegistry()); } - + virtual bool runOnFunction(Function &F); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); FunctionPass::getAnalysisUsage(AU); @@ -59,7 +59,7 @@ namespace { bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const; }; } // end anonymous namespace - + char Sinking::ID = 0; INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -71,7 +71,7 @@ FunctionPass *llvm::createSinkingPass() { return new Sinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified value /// occur in blocks dominated by the specified block. -bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, +bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const { // Ignoring debug uses is necessary so debug info doesn't affect the code. // This may leave a referencing dbg_value in the original block, before @@ -101,18 +101,18 @@ bool Sinking::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); bool MadeChange, EverMadeChange = false; - + do { MadeChange = false; DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n"); // Process all basic blocks. - for (Function::iterator I = F.begin(), E = F.end(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); EverMadeChange |= MadeChange; NumSinkIter++; } while (MadeChange); - + return EverMadeChange; } @@ -121,8 +121,8 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being - // unprofitable, it can also lead to infinite looping, because in an unreachable - // loop there may be nowhere to stop. + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&BB)) return false; bool MadeChange = false; @@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { SmallPtrSet<Instruction *, 8> Stores; do { Instruction *Inst = I; // The instruction to sink. - + // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == BB.begin(); @@ -146,10 +146,10 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { if (SinkInstruction(Inst, Stores)) ++NumSunk, MadeChange = true; - + // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); - + return MadeChange; } @@ -177,16 +177,17 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, /// IsAcceptableTarget - Return true if it is possible to sink the instruction /// in the specified basic block. -bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const { +bool Sinking::IsAcceptableTarget(Instruction *Inst, + BasicBlock *SuccToSinkTo) const { assert(Inst && "Instruction to be sunk is null"); assert(SuccToSinkTo && "Candidate sink target is null"); - + // It is not possible to sink an instruction into its own block. This can // happen with loops. if (Inst->getParent() == SuccToSinkTo) return false; - - // If the block has multiple predecessors, this would introduce computation + + // If the block has multiple predecessors, this would introduce computation // on different code paths. We could split the critical edge, but for now we // just punt. // FIXME: Split critical edges if not backedges. @@ -195,18 +196,19 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) co // other code paths. if (!isSafeToSpeculativelyExecute(Inst)) return false; - + // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!DT->dominates(Inst->getParent(), SuccToSinkTo)) return false; - + // Don't sink instructions into a loop. - Loop *succ = LI->getLoopFor(SuccToSinkTo), *cur = LI->getLoopFor(Inst->getParent()); + Loop *succ = LI->getLoopFor(SuccToSinkTo); + Loop *cur = LI->getLoopFor(Inst->getParent()); if (succ != 0 && succ != cur) return false; } - + // Finally, check that all the uses of the instruction are actually // dominated by the candidate return AllUsesDominatedByBlock(Inst, SuccToSinkTo); @@ -219,7 +221,7 @@ bool Sinking::SinkInstruction(Instruction *Inst, // Check if it's safe to move the instruction. if (!isSafeToMove(Inst, AA, Stores)) return false; - + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to @@ -227,41 +229,41 @@ bool Sinking::SinkInstruction(Instruction *Inst, // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - + // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. BasicBlock *SuccToSinkTo = 0; - + // Instructions can only be sunk if all their uses are in blocks // dominated by one of the successors. // Look at all the postdominators and see if we can sink it in one. DomTreeNode *DTN = DT->getNode(Inst->getParent()); - for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); + for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); I != E && SuccToSinkTo == 0; ++I) { BasicBlock *Candidate = (*I)->getBlock(); - if ((*I)->getIDom()->getBlock() == Inst->getParent() && + if ((*I)->getIDom()->getBlock() == Inst->getParent() && IsAcceptableTarget(Inst, Candidate)) SuccToSinkTo = Candidate; } - // If no suitable postdominator was found, look at all the successors and + // If no suitable postdominator was found, look at all the successors and // decide which one we should sink to, if any. - for (succ_iterator I = succ_begin(Inst->getParent()), + for (succ_iterator I = succ_begin(Inst->getParent()), E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) { if (IsAcceptableTarget(Inst, *I)) SuccToSinkTo = *I; } - + // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; - + DEBUG(dbgs() << "Sink" << *Inst << " ("; - WriteAsOperand(dbgs(), Inst->getParent(), false); + WriteAsOperand(dbgs(), Inst->getParent(), false); dbgs() << " -> "; - WriteAsOperand(dbgs(), SuccToSinkTo, false); + WriteAsOperand(dbgs(), SuccToSinkTo, false); dbgs() << ")\n"); - + // Move the instruction. Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt()); return true; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 074d0325d9..6557d630a9 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -172,7 +172,7 @@ bool TailCallElim::runOnFunction(Function &F) { FunctionContainsEscapingAllocas |= CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); } - + /// FIXME: The code generator produces really bad code when an 'escaping /// alloca' is changed from being a static alloca to being a dynamic alloca. /// Until this is resolved, disable this transformation if that would ever @@ -234,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; - + if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Loads may always be moved above calls without side effects. if (CI->mayHaveSideEffects()) { @@ -364,7 +364,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, if (&BB->front() == TI) // Make sure there is something before the terminator. return 0; - + // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI = 0; @@ -388,7 +388,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. - if (BB == &F->getEntryBlock() && + if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && callIsSmall(CI)) { @@ -432,7 +432,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock::iterator BBI = CI; for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; - + // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5576432149..2679b933f6 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast<PHINode>(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } // Update any PHI nodes in the returning block to realize that we no // longer branch to them. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f7724417..e13fd716fa 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { return CI; } +/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the +/// specified pointer. Ptr is required to be some pointer type, MaxLen must +/// be of size_t type, and the return value has 'intptr_t' type. +Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strnlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, StringRef Name) { + const TargetData *TD, const TargetLibraryInfo *TLI, + StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, StringRef Name) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); @@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty(), NULL); @@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); - + return CI; } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. -void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD) { +Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. -void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { +Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI) { // We really need TargetData for later. if (!TD) return false; @@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // string lengths for varying. if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - Name.substr(2, 6)); + TLI, Name.substr(2, 6)); + if (!Ret) + return false; replaceCall(Ret); return true; } @@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, Name.substr(2, 7)); + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + if (!Ret) + return false; replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index f96581393d..02bdcda391 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/Function.h" -#include "llvm/BasicBlock.h" -#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MDBuilder.h" -#include "llvm/ADT/Statistic.h" #include <vector> using namespace llvm; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b3f5289fcd..72d4199a2a 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) { else V = GetValueInMiddleOfBlock(User->getParent()); + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + U.set(V); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f37ea91397..518df7cdda 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -20,6 +20,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Type.h" @@ -35,7 +36,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MDBuilder.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -1330,7 +1330,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } - // If we folded the the first phi, PN dangles at this point. Refresh it. If + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); if (PN == 0) return true; @@ -1550,7 +1550,7 @@ static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, /// checkCSEInPredecessor - Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. /// -bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { +static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) return false; for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) { |