diff options
Diffstat (limited to 'lib/Transforms')
111 files changed, 5257 insertions, 2478 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index b0e22de8d7..d0b146b4e9 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hello" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(HelloCounter, "Counts number of functions greeted"); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index be48b2063f..2132e0a5fe 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -31,21 +31,21 @@ #define DEBUG_TYPE "argpromotion" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Support/CallSite.h" +#include "llvm/Module.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include <set> using namespace llvm; @@ -515,12 +515,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); // Add any return attributes. Attributes attrs = PAL.getRetAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, attrs)); // First, determine the new argument list @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = PAL.getFnAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); Type *RetTy = FTy->getReturnType(); @@ -611,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec)); + NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -636,12 +636,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, CallSite CS(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); - const AttrListPtr &CallPAL = CS.getAttributes(); + const AttributeSet &CallPAL = CS.getAttributes(); // Add any return attributes. Attributes attrs = CallPAL.getRetAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, attrs)); // Loop over the operands, inserting GEP and loads in the caller as @@ -723,7 +723,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = CallPAL.getFnAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); Instruction *New; @@ -731,12 +731,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); - cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(), + cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(), AttributesVec)); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); - cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(), + cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(), AttributesVec)); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index e2f012657f..d30eeaf7d3 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,15 +19,15 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" using namespace llvm; STATISTIC(NumMerged, "Number of global constants merged"); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4cfd0b235a..6236a04fc2 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -19,11 +19,15 @@ #define DEBUG_TYPE "deadargelim" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CallingConv.h" #include "llvm/Constant.h" +#include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/DIBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" @@ -32,10 +36,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include <map> #include <set> using namespace llvm; @@ -271,16 +271,16 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. - AttrListPtr PAL = CS.getAttributes(); + AttributeSet PAL = CS.getAttributes(); if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { SmallVector<AttributeWithIndex, 8> AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); Attributes FnAttrs = PAL.getFnAttributes(); if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); - PAL = AttrListPtr::get(Fn.getContext(), AttributesVec); + PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } Instruction *New; @@ -698,7 +698,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); // The existing function return attributes. Attributes RAttrs = PAL.getRetAttributes(); @@ -773,7 +773,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { "Return attributes no longer compatible?"); if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, RAttrs)); // Remember which arguments are still alive. @@ -802,11 +802,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec); + AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -833,7 +833,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *Call = CS.getInstruction(); AttributesVec.clear(); - const AttrListPtr &CallPAL = CS.getAttributes(); + const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. Attributes RAttrs = CallPAL.getRetAttributes(); @@ -843,7 +843,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes::get(NF->getContext(), AttrBuilder(RAttrs). removeAttributes(Attributes::typeIncompatible(NF->getReturnType()))); if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, RAttrs)); // Declare these outside of the loops, so we can reuse them for the second @@ -870,11 +870,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec); + AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 05aefeff9f..1dc79d113e 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Constants.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/SetVector.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 18409f77b3..685833da1a 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -20,17 +20,17 @@ #define DEBUG_TYPE "functionattrs" #include "llvm/Transforms/IPO.h" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/InstIterator.h" using namespace llvm; @@ -215,13 +215,13 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { AttrBuilder B; B.addAttribute(Attributes::ReadOnly) .addAttribute(Attributes::ReadNone); - F->removeAttribute(AttrListPtr::FunctionIndex, + F->removeAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); // Add in the new attribute. B.clear(); B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone); - F->addAttribute(AttrListPtr::FunctionIndex, + F->addAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); if (ReadsMemory) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 18c1c7b000..b2c819de2b 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,11 +17,11 @@ #define DEBUG_TYPE "globaldce" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumAliases , "Number of global aliases removed"); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 591278fa62..20f9de5a83 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -15,29 +15,29 @@ #define DEBUG_TYPE "globalopt" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -148,17 +148,13 @@ struct GlobalStatus { /// an instruction (e.g. a constant expr or GV initializer). bool HasNonInstructionUser; - /// HasPHIUser - Set to true if this global has a user that is a PHI node. - bool HasPHIUser; - /// AtomicOrdering - Set to the strongest atomic ordering requirement. AtomicOrdering Ordering; GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), StoredOnceValue(0), AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false), - Ordering(NotAtomic) {} + HasNonInstructionUser(false), Ordering(NotAtomic) {} }; } @@ -200,11 +196,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, const User *U = *UI; if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; - + // If the result of the constantexpr isn't pointer type, then we won't // know to expect it in various places. Just reject early. if (!isa<PointerType>(CE->getType())) return true; - + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { @@ -274,7 +270,6 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // have to be careful about infinite recursion. if (PHIUsers.insert(PN)) // Not already visited. if (AnalyzeGlobal(I, GS, PHIUsers)) return true; - GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { GS.isCompared = true; } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { @@ -2070,7 +2065,7 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) { +static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest)) continue; @@ -2157,7 +2152,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (GV == 0) return 0; - + // Verify that the initializer is simple enough for us to handle. We are // only allowed to optimize the initializer if it is unique. if (!GV->hasUniqueInitializer()) return 0; @@ -2263,7 +2258,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static inline bool +static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, const DataLayout *TD); @@ -2285,7 +2280,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || isa<GlobalValue>(C)) return true; - + // Aggregate values are safe if all their elements are. if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || isa<ConstantVector>(C)) { @@ -2296,7 +2291,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, } return true; } - + // We don't know exactly what relocations are allowed in constant expressions, // so we allow &global+constantoffset, which is safe and uniformly supported // across targets. @@ -2314,14 +2309,14 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, TD->getTypeSizeInBits(CE->getOperand(0)->getType())) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); - + // GEP is fine if it is simple + constant offset. case Instruction::GetElementPtr: for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) if (!isa<ConstantInt>(CE->getOperand(i))) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); - + case Instruction::Add: // We allow simple+cst. if (!isa<ConstantInt>(CE->getOperand(1))) @@ -2331,7 +2326,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, return false; } -static inline bool +static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, const DataLayout *TD) { @@ -2379,7 +2374,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - + // A constantexpr bitcast from a pointer to another pointer is a no-op, // and we know how to evaluate it by moving the bitcast from the pointer // operand to the value operand. @@ -2390,7 +2385,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); } } - + return false; } @@ -2420,7 +2415,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, // Return the modified struct. return ConstantStruct::get(STy, Elts); } - + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); SequentialType *InitTy = cast<SequentialType>(Init->getType()); @@ -2597,23 +2592,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; - + Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->getOpcode() == Instruction::BitCast) { // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. Ptr = CE->getOperand(0); - + Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); - + // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try // introspecting NewTy to find a legal conversion. @@ -2638,12 +2633,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } } - + // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); } - + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2805,7 +2800,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) InstResult = ConstantFoldConstantExpression(CE, TD, TLI); - + setVal(CurInst, InstResult); } @@ -2891,7 +2886,7 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD, Constant *RetValDummy; bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>()); - + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -3011,13 +3006,13 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { return 0; Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); - + if (!Fn) return 0; FunctionType *FTy = Fn->getFunctionType(); - - // Checking that the function has the right return type, the right number of + + // Checking that the function has the right return type, the right number of // parameters and that they all have pointer types should be enough. if (!FTy->getReturnType()->isIntegerTy() || FTy->getNumParams() != 3 || @@ -3092,7 +3087,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { // and remove them. bool Changed = false; - for (Function::use_iterator I = CXAAtExitFn->use_begin(), + for (Function::use_iterator I = CXAAtExitFn->use_begin(), E = CXAAtExitFn->use_end(); I != E;) { // We're only interested in calls. Theoretically, we could handle invoke // instructions as well, but neither llvm-gcc nor clang generate invokes @@ -3101,7 +3096,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { if (!CI) continue; - Function *DtorFn = + Function *DtorFn = dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); if (!DtorFn) continue; diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index d757e1fdb1..252b5b0584 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -17,14 +17,14 @@ #define DEBUG_TYPE "ipconstprop" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CallSite.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; STATISTIC(NumArgumentsProped, "Number of args turned into constants"); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 6f4b810acc..5b8832e5d7 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -13,18 +13,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/CallingConv.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Type.h" using namespace llvm; @@ -44,6 +44,10 @@ namespace { } static char ID; // Pass identification, replacement for typeid virtual InlineCost getInlineCost(CallSite CS); + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + virtual bool doFinalization(CallGraph &CG) { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index bf0b1f91a2..9c5feba08b 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/CallingConv.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/DataLayout.h" +#include "llvm/Type.h" using namespace llvm; @@ -42,6 +42,7 @@ namespace { InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, getInlineThreshold(CS)); } + using llvm::Pass::doInitialization; virtual bool doInitialization(CallGraph &CG); }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index abcb25fd45..bd8fa66d52 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index aa629cc0c6..b2cd3a765a 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -14,14 +14,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "internalize" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" #include <fstream> #include <set> using namespace llvm; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 97d7cdced0..af04d054ed 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "loop-extract" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/Statistic.h" #include <fstream> #include <set> using namespace llvm; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 44283ddce7..70345b8334 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -45,7 +45,13 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -53,17 +59,11 @@ #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include <vector> using namespace llvm; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 9c9910bd5c..6bd9c8372e 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -14,14 +14,14 @@ #define DEBUG_TYPE "partialinlining" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" using namespace llvm; STATISTIC(NumPartialInlined, "Number of functions partially inlined"); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 48e20ec339..a9a9f2eece 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -14,21 +14,19 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" - #include "llvm-c/Transforms/PassManagerBuilder.h" - -#include "llvm/PassManager.h" -#include "llvm/DefaultPasses.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/DefaultPasses.h" +#include "llvm/PassManager.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -190,10 +188,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (LoopVectorize) { + if (LoopVectorize && OptLevel > 1) MPM.add(createLoopVectorizePass()); - MPM.add(createLICMPass()); - } if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index fb4ecbfe7b..19f34837c7 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "prune-eh" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include <algorithm> using namespace llvm; @@ -145,8 +145,8 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { NewAttributes.addAttribute(Attributes::NoReturn); Function *F = (*I)->getFunction(); - const AttrListPtr &PAL = F->getAttributes(); - const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0, + const AttributeSet &PAL = F->getAttributes(); + const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0, Attributes::get(F->getContext(), NewAttributes)); if (PAL != NPAL) { diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index b5f09eccca..80cb869f02 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -16,9 +16,9 @@ #define DEBUG_TYPE "strip-dead-prototypes" #include "llvm/Transforms/IPO.h" -#include "llvm/Pass.h" -#include "llvm/Module.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" using namespace llvm; STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 80bfc1cdb2..ad915d716f 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -21,17 +21,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 7467eca7ab..0570104205 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,12 +11,12 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IRBuilder.h" +#include "llvm/InstVisitor.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -327,6 +327,11 @@ private: bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth=0); + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded + /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. + Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, + APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne); /// SimplifyDemandedInstructionBits - Inst is an integer instruction that /// SimplifyDemandedBits knows about. See if the instruction has any diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 12faedb0ff..f5c42a7983 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Intrinsics.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index aa9512cf03..41fe0873b3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,13 +12,16 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/CallSite.h" -#include "llvm/DataLayout.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/DataLayout.h" +#include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +STATISTIC(NumSimplified, "Number of library calls simplified"); + /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. static Type *getPromotedType(Type *Ty) { @@ -785,8 +788,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; - if (Value *With = Simplifier->optimizeCall(CI)) - return ReplaceInstUsesWith(*CI, With); + if (Value *With = Simplifier->optimizeCall(CI)) { + ++NumSimplified; + return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); + } return 0; } @@ -977,7 +982,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee == 0) return false; Instruction *Caller = CS.getInstruction(); - const AttrListPtr &CallerPAL = CS.getAttributes(); + const AttributeSet &CallerPAL = CS.getAttributes(); // Okay, this is a cast from a function to a different type. Unless doing so // would cause a type conversion of one of our arguments, change this call to @@ -1118,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add the new return attributes. if (RAttrs.hasAttributes()) attrVec.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributeWithIndex::get(AttributeSet::ReturnIndex, Attributes::get(FT->getContext(), RAttrs))); AI = CS.arg_begin(); @@ -1173,13 +1178,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Attributes FnAttrs = CallerPAL.getFnAttributes(); if (FnAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(), + const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), attrVec); Instruction *NC; @@ -1240,7 +1245,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, Value *Callee = CS.getCalledValue(); PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const AttrListPtr &Attrs = CS.getAttributes(); + const AttributeSet &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. @@ -1255,7 +1260,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, PointerType *NestFPTy = cast<PointerType>(NestF->getType()); FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); - const AttrListPtr &NestAttrs = NestF->getAttributes(); + const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; @@ -1285,7 +1290,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any result attributes. Attributes Attr = Attrs.getRetAttributes(); if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, Attr)); { @@ -1318,7 +1323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any function attributes. Attr = Attrs.getFnAttributes(); if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, Attr)); // The trampoline may have been bitcast to a bogus type (FTy). @@ -1358,7 +1363,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs); + const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index bb59db8e7b..19de62c81f 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,8 +14,8 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c3f8fe15d..1b96c3cca4 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index fd684200fc..5726d3a91d 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cefe45ec86..5cd611c420 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -252,6 +252,46 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return Changed ? &I : 0; } +// +// Detect pattern: +// +// log2(Y*0.5) +// +// And check for corresponding fast math flags +// + +static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + + if (!Op->hasOneUse()) + return; + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); + if (!II) + return; + if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) + return; + Log2 = II; + + Value *OpLog2Of = II->getArgOperand(0); + if (!OpLog2Of->hasOneUse()) + return; + + Instruction *I = dyn_cast<Instruction>(OpLog2Of); + if (!I) + return; + if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return; + + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); + if (CFP && CFP->isExactlyValue(0.5)) { + Y = I->getOperand(1); + return; + } + CFP = dyn_cast<ConstantFP>(I->getOperand(1)); + if (CFP && CFP->isExactlyValue(0.5)) + Y = I->getOperand(0); +} + Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -284,6 +324,33 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Value *Op1v = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFMul(Op0v, Op1v); + // Under unsafe algebra do: + // X * log2(0.5*Y) = X*log2(Y) - X + if (I.hasUnsafeAlgebra()) { + Value *OpX = NULL; + Value *OpY = NULL; + IntrinsicInst *Log2; + detectLog2OfHalf(Op0, OpY, Log2); + if (OpY) { + OpX = Op1; + } else { + detectLog2OfHalf(Op1, OpY, Log2); + if (OpY) { + OpX = Op0; + } + } + // if pattern detected emit alternate sequence + if (OpX && OpY) { + Log2->setArgOperand(0, OpY); + Value *FMulVal = Builder->CreateFMul(OpX, Log2); + Instruction *FMul = cast<Instruction>(FMulVal); + FMul->copyFastMathFlags(Log2); + Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX); + FSub->copyFastMathFlags(Log2); + return FSub; + } + } + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index de9c77e600..ea127e9f53 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index a2d4c888f2..a262d711d3 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/PatternMatch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 57021f1bef..8a28d8eaa2 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { I.setOperand(1, Rem); return &I; } - + return 0; } @@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is the opposite shift, we can directly reuse the input of the shift // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. @@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return CanEvaluateTruncated(I->getOperand(0), Ty); } #endif - + } } - + // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + switch (I->getOpcode()) { default: return false; case Instruction::And: @@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); - + case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); @@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; - + // We can always turn shl(c)+shr(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't @@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; - + // We can always turn lshr(c)+shl(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't @@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::Select: { @@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return false; return true; } - } + } } /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, @@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, IC.getTargetLibraryInfo()); return V; } - + Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); @@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; - + case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); @@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setHasNoSignedWrap(false); return I; } - + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); - + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } - + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setIsExact(false); return BO; } - + case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); @@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, NumBits, isLeftShift, IC)); return PN; } - } + } } @@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - - + + // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - - return ReplaceInstUsesWith(I, + + return ReplaceInstUsesWith(I, GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); } - - - // See if we can simplify any instructions used by the instruction whose sole + + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate // a signed shift. // @@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } - + // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); - + // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; - + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); @@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && + if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); @@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - + // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the @@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return new TruncInst(And, I.getType()); } } - + if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { + match(Op0BOOp1, + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_ConstantInt(CC)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); @@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } - + // FALL THROUGH. case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast<BinaryOperator>(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { + m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), + m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); - + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } - + break; } } - - + + // If the operand is an bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { bool isValid = true; // Valid only for And, Or, Xor bool highBitSet = false; // Transform if high bit of constant set? - + switch (Op0BO->getOpcode()) { default: isValid = false; break; // Do not perform transform! case Instruction::Add: @@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, highBitSet = true; break; } - + // If this is a signed shift right, and the high bit is modified // by the logical operation, do not perform the transformation. // The highBitSet boolean indicates the value of the high bit of @@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - + if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - + Value *NewShift = Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); - + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); } } } } - + // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) ShiftOp = 0; - + if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { // This is a constant shift of a constant shift. Be careful about hiding @@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - + IntegerType *Ty = cast<IntegerType>(I.getType()); - + // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. @@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } - + return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, AmtSum)); } - + if (ShiftAmt1 == ShiftAmt2) { // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && @@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewLShr; } Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); @@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewShl; } Value *Shift = Builder->CreateShl(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && @@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - + if (Instruction *V = commonShiftTransforms(I)) return V; - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the shifted-out value is known-zero, then this is a NUW shift. - if (!I.hasNoUnsignedWrap() && + if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { I.setHasNoUnsignedWrap(); return &I; } - + // If the shifted out value is all signbits, this is a NSW shift. if (!I.hasNoSignedWrap() && ComputeNumSignBits(I.getOperand(0)) > ShAmt) { @@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { @@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); @@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return new ZExtInst(Cmp, II->getType()); } } - + // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; - } + } } - + return 0; } @@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; @@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; } - } - + } + // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, APInt::getSignBit(I.getType()->getScalarSizeInBits()))) return BinaryOperator::CreateLShr(Op0, Op1); - + // Arithmetic shifting an all-sign-bit value is a no-op. unsigned NumSignBits = ComputeNumSignBits(Op0); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 602b203371..08aedb3200 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,9 +16,10 @@ #include "InstCombine.h" #include "llvm/DataLayout.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; - +using namespace llvm::PatternMatch; /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there @@ -199,8 +200,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == (DemandedMask & (~LHSKnownZero))) return I->getOperand(1); + } else if (I->getOpcode() == Instruction::Xor) { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); } - + // Compute the KnownZero/KnownOne bits to simplify things downstream. ComputeMaskedBits(I, KnownZero, KnownOne, Depth); return 0; @@ -580,6 +594,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + { + Value *VarX; ConstantInt *C1; + if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { + Instruction *Shr = cast<Instruction>(I->getOperand(0)); + Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, + KnownZero, KnownOne); + if (R) + return R; + } + } + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); @@ -800,6 +825,79 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; } +/// Helper routine of SimplifyDemandedUseBits. It tries to simplify +/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into +/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign +/// of "C2-C1". +/// +/// Suppose E1 and E2 are generally different in bits S={bm, bm+1, +/// ..., bn}, without considering the specific value X is holding. +/// This transformation is legal iff one of following conditions is hold: +/// 1) All the bit in S are 0, in this case E1 == E2. +/// 2) We don't care those bits in S, per the input DemandedMask. +/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the +/// rest bits. +/// +/// Currently we only test condition 2). +/// +/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was +/// not successful. +Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, + Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) { + + unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue(); + unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue(); + + KnownOne.clearAllBits(); + KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); + KnownZero &= DemandedMask; + + if (ShlAmt == 0 || ShrAmt == 0) + return 0; + + Value *VarX = Shr->getOperand(0); + Type *Ty = VarX->getType(); + + APInt BitMask1(Ty->getIntegerBitWidth(), (uint64_t)-1); + APInt BitMask2(Ty->getIntegerBitWidth(), (uint64_t)-1); + + bool isLshr = (Shr->getOpcode() == Instruction::LShr); + BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : + (BitMask1.ashr(ShrAmt) << ShlAmt); + + if (ShrAmt <= ShlAmt) { + BitMask2 <<= (ShlAmt - ShrAmt); + } else { + BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): + BitMask2.ashr(ShrAmt - ShlAmt); + } + + // Check if condition-2 (see the comment to this function) is satified. + if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { + if (ShrAmt == ShlAmt) + return VarX; + + if (!Shr->hasOneUse()) + return 0; + + BinaryOperator *New; + if (ShrAmt < ShlAmt) { + Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); + New = BinaryOperator::CreateShl(VarX, Amt); + BinaryOperator *Orig = cast<BinaryOperator>(Shl); + New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); + New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); + } else { + Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); + New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : + BinaryOperator::CreateAShr(VarX, Amt); + } + + return InsertNewInstWith(New, *Shl); + } + + return 0; +} /// SimplifyDemandedVectorElts - The specified value produces a vector with /// any number of elements. DemandedElts contains the set of elements that are diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index ea654ae9ed..b1a4966920 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -11,11 +11,11 @@ #define INSTCOMBINE_WORKLIST_H #define DEBUG_TYPE "instcombine" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Instruction.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index feef2ccee4..9da58d0e71 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -36,23 +36,23 @@ #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm-c/Initialization.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm-c/Initialization.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <climits> using namespace llvm; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4e05c3200c..f095cff33c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,14 +15,8 @@ #define DEBUG_TYPE "asan" +#include "llvm/Transforms/Instrumentation.h" #include "BlackList.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/InlineAsm.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" @@ -30,19 +24,24 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" - -#include <string> +#include "llvm/Type.h" #include <algorithm> +#include <string> using namespace llvm; @@ -70,6 +69,9 @@ static const char *kAsanMappingScaleName = "__asan_mapping_scale"; static const char *kAsanStackMallocName = "__asan_stack_malloc"; static const char *kAsanStackFreeName = "__asan_stack_free"; static const char *kAsanGenPrefix = "__asan_gen_"; +static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; +static const char *kAsanUnpoisonStackMemoryName = + "__asan_unpoison_stack_memory"; static const int kAsanStackLeftRedzoneMagic = 0xf1; static const int kAsanStackMidRedzoneMagic = 0xf2; @@ -113,9 +115,10 @@ static cl::opt<bool> ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); -// This flag may need to be replaced with -fasan-blacklist. -static cl::opt<std::string> ClBlackListFile("asan-blacklist", - cl::desc("File containing the list of functions to ignore " +static cl::opt<bool> ClRealignStack("asan-realign-stack", + cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true)); +static cl::opt<std::string> ClBlacklistFile("asan-blacklist", + cl::desc("File containing the list of objects to ignore " "during instrumentation"), cl::Hidden); // These flags allow to change the shadow mapping. @@ -136,6 +139,10 @@ static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", static cl::opt<bool> ClOptGlobals("asan-opt-globals", cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClCheckLifetime("asan-check-lifetime", + cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), + cl::Hidden, cl::init(false)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); @@ -186,8 +193,19 @@ static size_t RedzoneSize() { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(); - virtual const char *getPassName() const; + AddressSanitizer(bool CheckInitOrder = false, + bool CheckUseAfterReturn = false, + bool CheckLifetime = false, + StringRef BlacklistFile = StringRef()) + : FunctionPass(ID), + CheckInitOrder(CheckInitOrder || ClInitializers), + CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn), + CheckLifetime(CheckLifetime || ClCheckLifetime), + BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile + : BlacklistFile) {} + virtual const char *getPassName() const { + return "AddressSanitizerFunctionPass"; + } void instrumentMop(Instruction *I); void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); @@ -206,10 +224,10 @@ struct AddressSanitizer : public FunctionPass { bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool poisonStackInFunction(Function &F); virtual bool doInitialization(Module &M); - virtual bool doFinalization(Module &M); static char ID; // Pass identification, replacement for typeid private: + void initializeCallbacks(Module &M); uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -229,7 +247,18 @@ struct AddressSanitizer : public FunctionPass { Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); void FindDynamicInitializers(Module &M); - + /// Analyze lifetime intrinsics for given alloca. Use Value* instead of + /// AllocaInst* here, as we call this method after we merge all allocas into a + /// single one. Returns true if ASan added some instrumentation. + bool handleAllocaLifetime(Value *Alloca); + /// Analyze lifetime intrinsics for a specific value, casted from alloca. + /// Returns true if if ASan added some instrumentation. + bool handleValueLifetime(Value *V); + void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison); + + bool CheckInitOrder; + bool CheckUseAfterReturn; + bool CheckLifetime; LLVMContext *C; DataLayout *TD; uint64_t MappingOffset; @@ -239,7 +268,9 @@ struct AddressSanitizer : public FunctionPass { Function *AsanCtorFunction; Function *AsanInitFunction; Function *AsanStackMallocFunc, *AsanStackFreeFunc; + Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; Function *AsanHandleNoReturnFunc; + SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; @@ -247,19 +278,32 @@ struct AddressSanitizer : public FunctionPass { SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; }; -// FIXME: inherit this from ModulePass and actually use it as a ModulePass. -class AddressSanitizerCreateGlobalRedzonesPass { +class AddressSanitizerModule : public ModulePass { public: - bool runOnModule(Module &M, DataLayout *TD); + AddressSanitizerModule(bool CheckInitOrder = false, + StringRef BlacklistFile = StringRef()) + : ModulePass(ID), + CheckInitOrder(CheckInitOrder || ClInitializers), + BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile + : BlacklistFile) {} + bool runOnModule(Module &M); + static char ID; // Pass identification, replacement for typeid + virtual const char *getPassName() const { + return "AddressSanitizerModule"; + } + private: bool ShouldInstrumentGlobal(GlobalVariable *G); void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); + bool CheckInitOrder; + SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; Type *IntptrTy; LLVMContext *C; + DataLayout *TD; }; } // namespace @@ -268,13 +312,20 @@ char AddressSanitizer::ID = 0; INITIALIZE_PASS(AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { } -FunctionPass *llvm::createAddressSanitizerPass() { - return new AddressSanitizer(); +FunctionPass *llvm::createAddressSanitizerFunctionPass( + bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime, + StringRef BlacklistFile) { + return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn, + CheckLifetime, BlacklistFile); } -const char *AddressSanitizer::getPassName() const { - return "AddressSanitizer"; +char AddressSanitizerModule::ID = 0; +INITIALIZE_PASS(AddressSanitizerModule, "asan-module", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs." + "ModulePass", false, false) +ModulePass *llvm::createAddressSanitizerModulePass( + bool CheckInitOrder, StringRef BlacklistFile) { + return new AddressSanitizerModule(CheckInitOrder, BlacklistFile); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -385,7 +436,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) { if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. - if (!ClInitializers) + if (!CheckInitOrder) return; // If a global variable does not have dynamic initialization we don't // have to instrument it. However, if a global does not have initailizer @@ -492,7 +543,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Crash->setDebugLoc(OrigIns->getDebugLoc()); } -void AddressSanitizerCreateGlobalRedzonesPass::createInitializerPoisonCalls( +void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, Value *FirstAddr, Value *LastAddr) { // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); @@ -524,8 +575,7 @@ void AddressSanitizerCreateGlobalRedzonesPass::createInitializerPoisonCalls( } } -bool AddressSanitizerCreateGlobalRedzonesPass::ShouldInstrumentGlobal( - GlobalVariable *G) { +bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); @@ -587,9 +637,13 @@ bool AddressSanitizerCreateGlobalRedzonesPass::ShouldInstrumentGlobal( // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M, - DataLayout *TD) { - BL.reset(new BlackList(ClBlackListFile)); +bool AddressSanitizerModule::runOnModule(Module &M) { + if (!ClGlobals) return false; + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(BlacklistFile)); + if (BL->isIn(M)) return false; DynamicallyInitializedGlobals.Init(M); C = &(M.getContext()); IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits()); @@ -677,7 +731,7 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M, NULL); // Populate the first and last globals declared in this TU. - if (ClInitializers && GlobalHasDynamicInitializer) { + if (CheckInitOrder && GlobalHasDynamicInitializer) { LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); if (FirstDynamic == 0) FirstDynamic = LastDynamic; @@ -692,7 +746,7 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (ClInitializers && FirstDynamic && LastDynamic) + if (CheckInitOrder && FirstDynamic && LastDynamic) createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( @@ -726,32 +780,8 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M, return true; } -// virtual -bool AddressSanitizer::doInitialization(Module &M) { - // Initialize the private fields. No one has accessed them before. - TD = getAnalysisIfAvailable<DataLayout>(); - - if (!TD) - return false; - BL.reset(new BlackList(ClBlackListFile)); - DynamicallyInitializedGlobals.Init(M); - - C = &(M.getContext()); - LongSize = TD->getPointerSizeInBits(); - IntptrTy = Type::getIntNTy(*C, LongSize); - IntptrPtrTy = PointerType::get(IntptrTy, 0); - - AsanCtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); - BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); - // call __asan_init in the module ctor. - IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); - AsanInitFunction = checkInterfaceFunction( - M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); - AsanInitFunction->setLinkage(Function::ExternalLinkage); - IRB.CreateCall(AsanInitFunction); - +void AddressSanitizer::initializeCallbacks(Module &M) { + IRBuilder<> IRB(*C); // Create __asan_report* callbacks. for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; @@ -773,11 +803,42 @@ bool AddressSanitizer::doInitialization(Module &M) { IntptrTy, IntptrTy, IntptrTy, NULL)); AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); +} + +// virtual +bool AddressSanitizer::doInitialization(Module &M) { + // Initialize the private fields. No one has accessed them before. + TD = getAnalysisIfAvailable<DataLayout>(); + + if (!TD) + return false; + BL.reset(new BlackList(BlacklistFile)); + DynamicallyInitializedGlobals.Init(M); + + C = &(M.getContext()); + LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + IntptrPtrTy = PointerType::get(IntptrTy, 0); + + AsanCtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); + // call __asan_init in the module ctor. + IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); + AsanInitFunction = checkInterfaceFunction( + M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); + AsanInitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(AsanInitFunction); llvm::Triple targetTriple(M.getTargetTriple()); bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; @@ -817,18 +878,6 @@ bool AddressSanitizer::doInitialization(Module &M) { return true; } -bool AddressSanitizer::doFinalization(Module &M) { - // We transform the globals at the very end so that the optimization analysis - // works on the original globals. - if (ClGlobals) { - // FIXME: instead of doFinalization, run this as a true ModulePass. - AddressSanitizerCreateGlobalRedzonesPass Pass; - return Pass.runOnModule(M, TD); - } - return false; -} - - bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -849,6 +898,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); + initializeCallbacks(*F.getParent()); // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); @@ -1021,6 +1071,74 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { return true; } +// Handling llvm.lifetime intrinsics for a given %alloca: +// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca. +// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect +// invalid accesses) and unpoison it for llvm.lifetime.start (the memory +// could be poisoned by previous llvm.lifetime.end instruction, as the +// variable may go in and out of scope several times, e.g. in loops). +// (3) if we poisoned at least one %alloca in a function, +// unpoison the whole stack frame at function exit. +bool AddressSanitizer::handleAllocaLifetime(Value *Alloca) { + assert(CheckLifetime); + Type *AllocaType = Alloca->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(AllocaType->getContext()); + + bool Res = false; + // Typical code looks like this: + // %alloca = alloca <type>, <alignment> + // ... some code ... + // %val1 = bitcast <type>* %alloca to i8* + // call void @llvm.lifetime.start(i64 <size>, i8* %val1) + // ... more code ... + // %val2 = bitcast <type>* %alloca to i8* + // call void @llvm.lifetime.start(i64 <size>, i8* %val2) + // That is, to handle %alloca we must find all its casts to + // i8* values, and find lifetime instructions for these values. + if (AllocaType == Int8PtrTy) + Res |= handleValueLifetime(Alloca); + for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); + UI != UE; ++UI) { + if (UI->getType() != Int8PtrTy) continue; + if (UI->stripPointerCasts() != Alloca) continue; + Res |= handleValueLifetime(*UI); + } + return Res; +} + +bool AddressSanitizer::handleValueLifetime(Value *V) { + assert(CheckLifetime); + bool Res = false; + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; + ++UI) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) continue; + Intrinsic::ID ID = II->getIntrinsicID(); + if (ID != Intrinsic::lifetime_start && + ID != Intrinsic::lifetime_end) + continue; + if (V != II->getArgOperand(1)) + continue; + // Found lifetime intrinsic, add ASan instrumentation if necessary. + ConstantInt *Size = dyn_cast<ConstantInt>(II->getArgOperand(0)); + // If size argument is undefined, don't do anything. + if (Size->isMinusOne()) + continue; + // Check that size doesn't saturate uint64_t and can + // be stored in IntptrTy. + const uint64_t SizeValue = Size->getValue().getLimitedValue(); + if (SizeValue == ~0ULL || + !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) { + continue; + } + IRBuilder<> IRB(II); + bool DoPoison = (ID == Intrinsic::lifetime_end); + poisonAlloca(V, SizeValue, IRB, DoPoison); + Res = true; + } + return Res; +} + // Find all static Alloca instructions and put // poisoned red zones around all of them. // Then unpoison everything back before the function returns. @@ -1039,9 +1157,11 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { SmallVector<AllocaInst*, 16> AllocaVec; SmallVector<Instruction*, 8> RetVec; uint64_t TotalSize = 0; + bool HavePoisonedAllocas = false; // Filter out Alloca instructions we want (and can) handle. // Collect Ret instructions. + unsigned ResultAlignment = 1 << MappingScale(); for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { BasicBlock &BB = *FI; @@ -1057,7 +1177,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { if (AI->isArrayAllocation()) continue; if (!AI->isStaticAlloca()) continue; if (!AI->getAllocatedType()->isSized()) continue; - if (AI->getAlignment() > RedzoneSize()) continue; + ResultAlignment = std::max(ResultAlignment, AI->getAlignment()); AllocaVec.push_back(AI); uint64_t AlignedSize = getAlignedAllocaSize(AI); TotalSize += AlignedSize; @@ -1068,7 +1188,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize(); - bool DoStackMalloc = ClUseAfterReturn + bool DoStackMalloc = CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; Instruction *InsBefore = AllocaVec[0]; @@ -1078,7 +1198,9 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); - MyAlloca->setAlignment(RedzoneSize()); + if (ClRealignStack && ResultAlignment < RedzoneSize()) + ResultAlignment = RedzoneSize(); + MyAlloca->setAlignment(ResultAlignment); assert(MyAlloca->isStaticAlloca()); Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy); Value *LocalStackBase = OrigStackBase; @@ -1103,10 +1225,13 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { << Name.size() << " " << Name << " "; uint64_t AlignedSize = getAlignedAllocaSize(AI); assert((AlignedSize % RedzoneSize()) == 0); - AI->replaceAllUsesWith( - IRB.CreateIntToPtr( + Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), - AI->getType())); + AI->getType()); + AI->replaceAllUsesWith(NewAllocaPtr); + // Analyze lifetime intrinsics only for static allocas we handle. + if (CheckLifetime) + HavePoisonedAllocas |= handleAllocaLifetime(NewAllocaPtr); Pos += AlignedSize + RedzoneSize(); } assert(Pos == LocalStackSize); @@ -1139,9 +1264,15 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false); if (DoStackMalloc) { + // In use-after-return mode, mark the whole stack frame unaddressable. IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase, ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); + } else if (HavePoisonedAllocas) { + // If we poisoned some allocas in llvm.lifetime analysis, + // unpoison whole stack frame now. + assert(LocalStackBase == OrigStackBase); + poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false); } } @@ -1155,3 +1286,13 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { return true; } + +void AddressSanitizer::poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, + bool DoPoison) { + // For now just insert the call to ASan runtime. + Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); + Value *SizeArg = ConstantInt::get(IntptrTy, Size); + IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc + : AsanUnpoisonStackMemoryFunc, + AddrArg, SizeArg); +} diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index e02c631f7f..0bfb186562 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -13,9 +13,6 @@ // //===----------------------------------------------------------------------===// -#include <utility> -#include <string> - #include "BlackList.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -28,12 +25,14 @@ #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" +#include <string> +#include <utility> namespace llvm { BlackList::BlackList(const StringRef Path) { // Validate and open blacklist file. - if (!Path.size()) return; + if (Path.empty()) return; OwningPtr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(Path, File)) { report_fatal_error("Can't open blacklist file: " + Path + ": " + @@ -53,6 +52,10 @@ BlackList::BlackList(const StringRef Path) { std::pair<StringRef, StringRef> SplitLine = I->split(":"); StringRef Prefix = SplitLine.first; std::string Regexp = SplitLine.second; + if (Regexp.empty()) { + // Missing ':' in the line. + report_fatal_error("malformed blacklist line: " + SplitLine.first); + } // Replace * with .* for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; @@ -69,7 +72,7 @@ BlackList::BlackList(const StringRef Path) { } // Add this regexp into the proper group by its prefix. - if (Regexps[Prefix].size()) + if (!Regexps[Prefix].empty()) Regexps[Prefix] += "|"; Regexps[Prefix] += Regexp; } @@ -110,10 +113,12 @@ bool BlackList::isInInit(const GlobalVariable &G) { inSection("global-init-type", GetGVTypeString(G))); } -bool BlackList::inSection(const StringRef Section, - const StringRef Query) { - Regex *FunctionRegex = Entries[Section]; - return FunctionRegex ? FunctionRegex->match(Query) : false; +bool BlackList::inSection(const StringRef Section, const StringRef Query) { + StringMap<Regex*>::iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + + Regex *FunctionRegex = I->getValue(); + return FunctionRegex->match(Query); } } // namespace llvm diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 2d5d5603c0..303e04ac16 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bounds-checking" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/DataLayout.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Instrumentation.h" using namespace llvm; static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", @@ -108,6 +108,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { else Cmp = 0; // unconditional branch } + ++ChecksAdded; Instruction *Inst = Builder->GetInsertPoint(); BasicBlock *OldBB = Inst->getParent(); @@ -162,7 +163,6 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { } emitBranchToTrap(Or); - ++ChecksAdded; return true; } diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 058f68c7ce..1c9e053679 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMInstrumentation BoundsChecking.cpp EdgeProfiling.cpp GCOVProfiling.cpp + MemorySanitizer.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp PathProfiling.cpp diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index e8ef2654d2..41e42aff49 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -18,13 +18,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-edge-profiling" +#include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" -#include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a8adaa62d7..5e064cd70d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -16,19 +16,19 @@ #define DEBUG_TYPE "insert-gcov-profiling" -#include "ProfilingUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/DebugInfo.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "ProfilingUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/DebugInfo.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/InstIterator.h" @@ -45,11 +45,11 @@ namespace { static char ID; GCOVProfiler() : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), - UseExtraChecksum(false) { + UseExtraChecksum(false), NoRedZone(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false, - bool useExtraChecksum = false) + bool useExtraChecksum = false, bool NoRedZone = false) : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); @@ -98,6 +98,7 @@ namespace { bool EmitData; bool Use402Format; bool UseExtraChecksum; + bool NoRedZone; Module *M; LLVMContext *Ctx; @@ -110,8 +111,10 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, bool Use402Format, - bool UseExtraChecksum) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum); + bool UseExtraChecksum, + bool NoRedZone) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum, + NoRedZone); } namespace { @@ -638,6 +641,9 @@ void GCOVProfiler::insertCounterWriteout( WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); + WriteoutF->addFnAttr(Attributes::NoInline); + if (NoRedZone) + WriteoutF->addFnAttr(Attributes::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); IRBuilder<> Builder(BB); @@ -683,6 +689,8 @@ void GCOVProfiler::insertCounterWriteout( F->setUnnamedAddr(true); F->setLinkage(GlobalValue::InternalLinkage); F->addFnAttr(Attributes::NoInline); + if (NoRedZone) + F->addFnAttr(Attributes::NoRedZone); BB = BasicBlock::Create(*Ctx, "entry", F); Builder.SetInsertPoint(BB); @@ -702,6 +710,8 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attributes::NoInline); + if (NoRedZone) + Fn->addFnAttr(Attributes::NoRedZone); Type *Int32Ty = Type::getInt32Ty(*Ctx); Type *Int64Ty = Type::getInt64Ty(*Ctx); @@ -758,6 +768,9 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); + FlushF->addFnAttr(Attributes::NoInline); + if (NoRedZone) + FlushF->addFnAttr(Attributes::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 1e0b4a348a..8ba102559b 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -21,11 +21,13 @@ using namespace llvm; /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerPass(Registry); + initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); initializeEdgeProfilerPass(Registry); initializeGCOVProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); + initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index a4bb5a66af..50226db8c2 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -15,10 +15,10 @@ #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#include "llvm/BasicBlock.h" #include "llvm/ADT/EquivalenceClasses.h" -#include <vector> +#include "llvm/BasicBlock.h" #include <algorithm> +#include <vector> namespace llvm { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp new file mode 100644 index 0000000000..947a2e3b12 --- /dev/null +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -0,0 +1,1579 @@ +//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file is a part of MemorySanitizer, a detector of uninitialized +/// reads. +/// +/// Status: early prototype. +/// +/// The algorithm of the tool is similar to Memcheck +/// (http://goo.gl/QKbem). We associate a few shadow bits with every +/// byte of the application memory, poison the shadow of the malloc-ed +/// or alloca-ed memory, load the shadow bits on every memory read, +/// propagate the shadow bits through some of the arithmetic +/// instruction (including MOV), store the shadow bits on every memory +/// write, report a bug on some other instructions (e.g. JMP) if the +/// associated shadow is poisoned. +/// +/// But there are differences too. The first and the major one: +/// compiler instrumentation instead of binary instrumentation. This +/// gives us much better register allocation, possible compiler +/// optimizations and a fast start-up. But this brings the major issue +/// as well: msan needs to see all program events, including system +/// calls and reads/writes in system libraries, so we either need to +/// compile *everything* with msan or use a binary translation +/// component (e.g. DynamoRIO) to instrument pre-built libraries. +/// Another difference from Memcheck is that we use 8 shadow bits per +/// byte of application memory and use a direct shadow mapping. This +/// greatly simplifies the instrumentation code and avoids races on +/// shadow updates (Memcheck is single-threaded so races are not a +/// concern there. Memcheck uses 2 shadow bits per byte with a slow +/// path storage that uses 8 bits per byte). +/// +/// The default value of shadow is 0, which means "clean" (not poisoned). +/// +/// Every module initializer should call __msan_init to ensure that the +/// shadow memory is ready. On error, __msan_warning is called. Since +/// parameters and return values may be passed via registers, we have a +/// specialized thread-local shadow for return values +/// (__msan_retval_tls) and parameters (__msan_param_tls). +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msan" + +#include "llvm/Transforms/Instrumentation.h" +#include "BlackList.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" +#include "llvm/InstVisitor.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" + +using namespace llvm; + +static const uint64_t kShadowMask32 = 1ULL << 31; +static const uint64_t kShadowMask64 = 1ULL << 46; +static const uint64_t kOriginOffset32 = 1ULL << 30; +static const uint64_t kOriginOffset64 = 1ULL << 45; + +// This is an important flag that makes the reports much more +// informative at the cost of greater slowdown. Not fully implemented +// yet. +// FIXME: this should be a top-level clang flag, e.g. +// -fmemory-sanitizer-full. +static cl::opt<bool> ClTrackOrigins("msan-track-origins", + cl::desc("Track origins (allocation sites) of poisoned memory"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClKeepGoing("msan-keep-going", + cl::desc("keep going after reporting a UMR"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClPoisonStack("msan-poison-stack", + cl::desc("poison uninitialized stack variables"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", + cl::desc("poison uninitialized stack variables with a call"), + cl::Hidden, cl::init(false)); +static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", + cl::desc("poison uninitialized stack variables with the given patter"), + cl::Hidden, cl::init(0xff)); + +static cl::opt<bool> ClHandleICmp("msan-handle-icmp", + cl::desc("propagate shadow through ICmpEQ and ICmpNE"), + cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin", + cl::desc("store origin for clean (fully initialized) values"), + cl::Hidden, cl::init(false)); + +// This flag controls whether we check the shadow of the address +// operand of load or store. Such bugs are very rare, since load from +// a garbage address typically results in SEGV, but still happen +// (e.g. only lower bits of address are garbage, or the access happens +// early at program startup where malloc-ed memory is more likely to +// be zeroed. As of 2012-08-28 this flag adds 20% slowdown. +static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address", + cl::desc("report accesses through a pointer which has poisoned shadow"), + cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions", + cl::desc("print out instructions with default strict semantics"), + cl::Hidden, cl::init(false)); + +static cl::opt<std::string> ClBlackListFile("msan-blacklist", + cl::desc("File containing the list of functions where MemorySanitizer " + "should not report bugs"), cl::Hidden); + +namespace { + +/// \brief An instrumentation pass implementing detection of uninitialized +/// reads. +/// +/// MemorySanitizer: instrument the code in module to find +/// uninitialized reads. +class MemorySanitizer : public FunctionPass { +public: + MemorySanitizer() : FunctionPass(ID), TD(0), WarningFn(0) { } + const char *getPassName() const { return "MemorySanitizer"; } + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + static char ID; // Pass identification, replacement for typeid. + +private: + void initializeCallbacks(Module &M); + + DataLayout *TD; + LLVMContext *C; + Type *IntptrTy; + Type *OriginTy; + /// \brief Thread-local shadow storage for function parameters. + GlobalVariable *ParamTLS; + /// \brief Thread-local origin storage for function parameters. + GlobalVariable *ParamOriginTLS; + /// \brief Thread-local shadow storage for function return value. + GlobalVariable *RetvalTLS; + /// \brief Thread-local origin storage for function return value. + GlobalVariable *RetvalOriginTLS; + /// \brief Thread-local shadow storage for in-register va_arg function + /// parameters (x86_64-specific). + GlobalVariable *VAArgTLS; + /// \brief Thread-local shadow storage for va_arg overflow area + /// (x86_64-specific). + GlobalVariable *VAArgOverflowSizeTLS; + /// \brief Thread-local space used to pass origin value to the UMR reporting + /// function. + GlobalVariable *OriginTLS; + + /// \brief The run-time callback to print a warning. + Value *WarningFn; + /// \brief Run-time helper that copies origin info for a memory range. + Value *MsanCopyOriginFn; + /// \brief Run-time helper that generates a new origin value for a stack + /// allocation. + Value *MsanSetAllocaOriginFn; + /// \brief Run-time helper that poisons stack on function entry. + Value *MsanPoisonStackFn; + /// \brief MSan runtime replacements for memmove, memcpy and memset. + Value *MemmoveFn, *MemcpyFn, *MemsetFn; + + /// \brief Address mask used in application-to-shadow address calculation. + /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask. + uint64_t ShadowMask; + /// \brief Offset of the origin shadow from the "normal" shadow. + /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL + uint64_t OriginOffset; + /// \brief Branch weights for error reporting. + MDNode *ColdCallWeights; + /// \brief Branch weights for origin store. + MDNode *OriginStoreWeights; + /// \brief The blacklist. + OwningPtr<BlackList> BL; + /// \brief An empty volatile inline asm that prevents callback merge. + InlineAsm *EmptyAsm; + + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; +}; +} // namespace + +char MemorySanitizer::ID = 0; +INITIALIZE_PASS(MemorySanitizer, "msan", + "MemorySanitizer: detects uninitialized reads.", + false, false) + +FunctionPass *llvm::createMemorySanitizerPass() { + return new MemorySanitizer(); +} + +/// \brief Create a non-const global initialized with the given string. +/// +/// Creates a writable global for Str so that we can pass it to the +/// run-time lib. Runtime uses first 4 bytes of the string to store the +/// frame ID, so the string needs to be mutable. +static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, + StringRef Str) { + Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); + return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false, + GlobalValue::PrivateLinkage, StrConst, ""); +} + + +/// \brief Insert extern declaration of runtime-provided functions and globals. +void MemorySanitizer::initializeCallbacks(Module &M) { + // Only do this once. + if (WarningFn) + return; + + IRBuilder<> IRB(*C); + // Create the callback. + // FIXME: this function should have "Cold" calling conv, + // which is not yet implemented. + StringRef WarningFnName = ClKeepGoing ? "__msan_warning" + : "__msan_warning_noreturn"; + WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL); + + MsanCopyOriginFn = M.getOrInsertFunction( + "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL); + MsanSetAllocaOriginFn = M.getOrInsertFunction( + "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, + IRB.getInt8PtrTy(), NULL); + MsanPoisonStackFn = M.getOrInsertFunction( + "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL); + MemmoveFn = M.getOrInsertFunction( + "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL); + MemcpyFn = M.getOrInsertFunction( + "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL); + MemsetFn = M.getOrInsertFunction( + "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy, NULL); + + // Create globals. + RetvalTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 8), false, + GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + RetvalOriginTLS = new GlobalVariable( + M, OriginTy, false, GlobalVariable::ExternalLinkage, 0, + "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + ParamTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 1000), false, + GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + ParamOriginTLS = new GlobalVariable( + M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage, + 0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + VAArgTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 1000), false, + GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + VAArgOverflowSizeTLS = new GlobalVariable( + M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0, + "__msan_va_arg_overflow_size_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + OriginTLS = new GlobalVariable( + M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0, + "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + // We insert an empty inline asm after __msan_report* to avoid callback merge. + EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), + StringRef(""), StringRef(""), + /*hasSideEffects=*/true); +} + +/// \brief Module-level initialization. +/// +/// inserts a call to __msan_init to the module's constructor list. +bool MemorySanitizer::doInitialization(Module &M) { + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(ClBlackListFile)); + C = &(M.getContext()); + unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0); + switch (PtrSize) { + case 64: + ShadowMask = kShadowMask64; + OriginOffset = kOriginOffset64; + break; + case 32: + ShadowMask = kShadowMask32; + OriginOffset = kOriginOffset32; + break; + default: + report_fatal_error("unsupported pointer size"); + break; + } + + IRBuilder<> IRB(*C); + IntptrTy = IRB.getIntPtrTy(TD); + OriginTy = IRB.getInt32Ty(); + + ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000); + OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); + + // Insert a call to __msan_init/__msan_track_origins into the module's CTORs. + appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction( + "__msan_init", IRB.getVoidTy(), NULL)), 0); + + new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, + IRB.getInt32(ClTrackOrigins), "__msan_track_origins"); + + return true; +} + +namespace { + +/// \brief A helper class that handles instrumentation of VarArg +/// functions on a particular platform. +/// +/// Implementations are expected to insert the instrumentation +/// necessary to propagate argument shadow through VarArg function +/// calls. Visit* methods are called during an InstVisitor pass over +/// the function, and should avoid creating new basic blocks. A new +/// instance of this class is created for each instrumented function. +struct VarArgHelper { + /// \brief Visit a CallSite. + virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0; + + /// \brief Visit a va_start call. + virtual void visitVAStartInst(VAStartInst &I) = 0; + + /// \brief Visit a va_copy call. + virtual void visitVACopyInst(VACopyInst &I) = 0; + + /// \brief Finalize function instrumentation. + /// + /// This method is called after visiting all interesting (see above) + /// instructions in a function. + virtual void finalizeInstrumentation() = 0; + + virtual ~VarArgHelper() {} +}; + +struct MemorySanitizerVisitor; + +VarArgHelper* +CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor); + +/// This class does all the work for a given function. Store and Load +/// instructions store and load corresponding shadow and origin +/// values. Most instructions propagate shadow from arguments to their +/// return values. Certain instructions (most importantly, BranchInst) +/// test their argument shadow and print reports (with a runtime call) if it's +/// non-zero. +struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { + Function &F; + MemorySanitizer &MS; + SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; + ValueMap<Value*, Value*> ShadowMap, OriginMap; + bool InsertChecks; + OwningPtr<VarArgHelper> VAHelper; + + // An unfortunate workaround for asymmetric lowering of va_arg stuff. + // See a comment in visitCallSite for more details. + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64FpEndOffset = 176; + + struct ShadowOriginAndInsertPoint { + Instruction *Shadow; + Instruction *Origin; + Instruction *OrigIns; + ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I) + : Shadow(S), Origin(O), OrigIns(I) { } + ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { } + }; + SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList; + SmallVector<Instruction*, 16> StoreList; + + MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) + : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { + InsertChecks = !MS.BL->isIn(F); + DEBUG(if (!InsertChecks) + dbgs() << "MemorySanitizer is not inserting checks into '" + << F.getName() << "'\n"); + } + + void materializeStores() { + for (size_t i = 0, n = StoreList.size(); i < n; i++) { + StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]); + + IRBuilder<> IRB(&I); + Value *Val = I.getValueOperand(); + Value *Addr = I.getPointerOperand(); + Value *Shadow = getShadow(Val); + Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); + + StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); + (void)NewSI; + // If the store is volatile, add a check. + if (I.isVolatile()) + insertCheck(Val, &I); + if (ClCheckAccessAddress) + insertCheck(Addr, &I); + + if (ClTrackOrigins) { + if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) { + IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment()); + } else { + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + + Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow); + // TODO(eugenis): handle non-zero constant shadow by inserting an + // unconditional check (can not simply fail compilation as this could + // be in the dead code). + if (Cst) + continue; + + Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, + getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, MS.OriginStoreWeights); + IRBuilder<> IRBNewBlock(CheckTerm); + IRBNewBlock.CreateAlignedStore(getOrigin(Val), + getOriginPtr(Addr, IRBNewBlock), I.getAlignment()); + } + } + } + } + + void materializeChecks() { + for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { + Instruction *Shadow = InstrumentationList[i].Shadow; + Instruction *OrigIns = InstrumentationList[i].OrigIns; + IRBuilder<> IRB(OrigIns); + DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); + Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, + getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), + /* Unreachable */ !ClKeepGoing, + MS.ColdCallWeights); + + IRB.SetInsertPoint(CheckTerm); + if (ClTrackOrigins) { + Instruction *Origin = InstrumentationList[i].Origin; + IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0), + MS.OriginTLS); + } + CallInst *Call = IRB.CreateCall(MS.WarningFn); + Call->setDebugLoc(OrigIns->getDebugLoc()); + IRB.CreateCall(MS.EmptyAsm); + DEBUG(dbgs() << " CHECK: " << *Cmp << "\n"); + } + DEBUG(dbgs() << "DONE:\n" << F); + } + + /// \brief Add MemorySanitizer instrumentation to a function. + bool runOnFunction() { + MS.initializeCallbacks(*F.getParent()); + if (!MS.TD) return false; + // Iterate all BBs in depth-first order and create shadow instructions + // for all instructions (where applicable). + // For PHI nodes we create dummy shadow PHIs which will be finalized later. + for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { + BasicBlock *BB = *DI; + visit(*BB); + } + + // Finalize PHI nodes. + for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { + PHINode *PN = ShadowPHINodes[i]; + PHINode *PNS = cast<PHINode>(getShadow(PN)); + PHINode *PNO = ClTrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0; + size_t NumValues = PN->getNumIncomingValues(); + for (size_t v = 0; v < NumValues; v++) { + PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); + if (PNO) + PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v)); + } + } + + VAHelper->finalizeInstrumentation(); + + // Delayed instrumentation of StoreInst. + // This may add new checks to be inserted later. + materializeStores(); + + // Insert shadow value checks. + materializeChecks(); + + return true; + } + + /// \brief Compute the shadow type that corresponds to a given Value. + Type *getShadowTy(Value *V) { + return getShadowTy(V->getType()); + } + + /// \brief Compute the shadow type that corresponds to a given Type. + Type *getShadowTy(Type *OrigTy) { + if (!OrigTy->isSized()) { + return 0; + } + // For integer type, shadow is the same as the original type. + // This may return weird-sized types like i1. + if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) + return IT; + if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) + return VectorType::getInteger(VT); + if (StructType *ST = dyn_cast<StructType>(OrigTy)) { + SmallVector<Type*, 4> Elements; + for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) + Elements.push_back(getShadowTy(ST->getElementType(i))); + StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked()); + DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); + return Res; + } + uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy); + return IntegerType::get(*MS.C, TypeSize); + } + + /// \brief Flatten a vector type. + Type *getShadowTyNoVec(Type *ty) { + if (VectorType *vt = dyn_cast<VectorType>(ty)) + return IntegerType::get(*MS.C, vt->getBitWidth()); + return ty; + } + + /// \brief Convert a shadow value to it's flattened variant. + Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) { + Type *Ty = V->getType(); + Type *NoVecTy = getShadowTyNoVec(Ty); + if (Ty == NoVecTy) return V; + return IRB.CreateBitCast(V, NoVecTy); + } + + /// \brief Compute the shadow address that corresponds to a given application + /// address. + /// + /// Shadow = Addr & ~ShadowMask. + Value *getShadowPtr(Value *Addr, Type *ShadowTy, + IRBuilder<> &IRB) { + Value *ShadowLong = + IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask)); + return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); + } + + /// \brief Compute the origin address that corresponds to a given application + /// address. + /// + /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL + Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) { + Value *ShadowLong = + IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask)); + Value *Add = + IRB.CreateAdd(ShadowLong, + ConstantInt::get(MS.IntptrTy, MS.OriginOffset)); + Value *SecondAnd = + IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL)); + return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0)); + } + + /// \brief Compute the shadow address for a given function argument. + /// + /// Shadow = ParamTLS+ArgOffset. + Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), + "_msarg"); + } + + /// \brief Compute the origin address for a given function argument. + Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + if (!ClTrackOrigins) return 0; + Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), + "_msarg_o"); + } + + /// \brief Compute the shadow address for a retval. + Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) { + Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy); + return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), + "_msret"); + } + + /// \brief Compute the origin address for a retval. + Value *getOriginPtrForRetval(IRBuilder<> &IRB) { + // We keep a single origin for the entire retval. Might be too optimistic. + return MS.RetvalOriginTLS; + } + + /// \brief Set SV to be the shadow value for V. + void setShadow(Value *V, Value *SV) { + assert(!ShadowMap.count(V) && "Values may only have one shadow"); + ShadowMap[V] = SV; + } + + /// \brief Set Origin to be the origin value for V. + void setOrigin(Value *V, Value *Origin) { + if (!ClTrackOrigins) return; + assert(!OriginMap.count(V) && "Values may only have one origin"); + DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n"); + OriginMap[V] = Origin; + } + + /// \brief Create a clean shadow value for a given value. + /// + /// Clean shadow (all zeroes) means all bits of the value are defined + /// (initialized). + Value *getCleanShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return Constant::getNullValue(ShadowTy); + } + + /// \brief Create a dirty shadow of a given shadow type. + Constant *getPoisonedShadow(Type *ShadowTy) { + assert(ShadowTy); + if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) + return Constant::getAllOnesValue(ShadowTy); + StructType *ST = cast<StructType>(ShadowTy); + SmallVector<Constant *, 4> Vals; + for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) + Vals.push_back(getPoisonedShadow(ST->getElementType(i))); + return ConstantStruct::get(ST, Vals); + } + + /// \brief Create a clean (zero) origin. + Value *getCleanOrigin() { + return Constant::getNullValue(MS.OriginTy); + } + + /// \brief Get the shadow value for a given Value. + /// + /// This function either returns the value set earlier with setShadow, + /// or extracts if from ParamTLS (for function arguments). + Value *getShadow(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) { + // For instructions the shadow is already stored in the map. + Value *Shadow = ShadowMap[V]; + if (!Shadow) { + DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent())); + (void)I; + assert(Shadow && "No shadow for a value"); + } + return Shadow; + } + if (UndefValue *U = dyn_cast<UndefValue>(V)) { + Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); + (void)U; + return AllOnes; + } + if (Argument *A = dyn_cast<Argument>(V)) { + // For arguments we compute the shadow on demand and store it in the map. + Value **ShadowPtr = &ShadowMap[V]; + if (*ShadowPtr) + return *ShadowPtr; + Function *F = A->getParent(); + IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); + unsigned ArgOffset = 0; + for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + if (!AI->getType()->isSized()) { + DEBUG(dbgs() << "Arg is not sized\n"); + continue; + } + unsigned Size = AI->hasByValAttr() + ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType()) + : MS.TD->getTypeAllocSize(AI->getType()); + if (A == AI) { + Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset); + if (AI->hasByValAttr()) { + // ByVal pointer itself has clean shadow. We copy the actual + // argument shadow to the underlying memory. + Value *Cpy = EntryIRB.CreateMemCpy( + getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), + Base, Size, AI->getParamAlignment()); + DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); + (void)Cpy; + *ShadowPtr = getCleanShadow(V); + } else { + *ShadowPtr = EntryIRB.CreateLoad(Base); + } + DEBUG(dbgs() << " ARG: " << *AI << " ==> " << + **ShadowPtr << "\n"); + if (ClTrackOrigins) { + Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset); + setOrigin(A, EntryIRB.CreateLoad(OriginPtr)); + } + } + ArgOffset += DataLayout::RoundUpAlignment(Size, 8); + } + assert(*ShadowPtr && "Could not find shadow for an argument"); + return *ShadowPtr; + } + // For everything else the shadow is zero. + return getCleanShadow(V); + } + + /// \brief Get the shadow for i-th argument of the instruction I. + Value *getShadow(Instruction *I, int i) { + return getShadow(I->getOperand(i)); + } + + /// \brief Get the origin for a value. + Value *getOrigin(Value *V) { + if (!ClTrackOrigins) return 0; + if (isa<Instruction>(V) || isa<Argument>(V)) { + Value *Origin = OriginMap[V]; + if (!Origin) { + DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n"); + Origin = getCleanOrigin(); + } + return Origin; + } + return getCleanOrigin(); + } + + /// \brief Get the origin for i-th argument of the instruction I. + Value *getOrigin(Instruction *I, int i) { + return getOrigin(I->getOperand(i)); + } + + /// \brief Remember the place where a shadow check should be inserted. + /// + /// This location will be later instrumented with a check that will print a + /// UMR warning in runtime if the value is not fully defined. + void insertCheck(Value *Val, Instruction *OrigIns) { + assert(Val); + if (!InsertChecks) return; + Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val)); + if (!Shadow) return; +#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) && + "Can only insert checks for integer and vector shadow types"); +#endif + Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val)); + InstrumentationList.push_back( + ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns)); + } + + //------------------- Visitors. + + /// \brief Instrument LoadInst + /// + /// Loads the corresponding shadow and (optionally) origin. + /// Optionally, checks that the load address is fully defined. + void visitLoadInst(LoadInst &I) { + assert(I.getType()->isSized() && "Load type must have size"); + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + Value *Addr = I.getPointerOperand(); + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + + if (ClCheckAccessAddress) + insertCheck(I.getPointerOperand(), &I); + + if (ClTrackOrigins) + setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment())); + } + + /// \brief Instrument StoreInst + /// + /// Stores the corresponding shadow and (optionally) origin. + /// Optionally, checks that the store address is fully defined. + /// Volatile stores check that the value being stored is fully defined. + void visitStoreInst(StoreInst &I) { + StoreList.push_back(&I); + } + + // Vector manipulation. + void visitExtractElementInst(ExtractElementInst &I) { + insertCheck(I.getOperand(1), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1), + "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitInsertElementInst(InsertElementInst &I) { + insertCheck(I.getOperand(2), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1), + I.getOperand(2), "_msprop")); + setOriginForNaryOp(I); + } + + void visitShuffleVectorInst(ShuffleVectorInst &I) { + insertCheck(I.getOperand(2), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), + I.getOperand(2), "_msprop")); + setOriginForNaryOp(I); + } + + // Casts. + void visitSExtInst(SExtInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitZExtInst(ZExtInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitTruncInst(TruncInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitBitCastInst(BitCastInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I))); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitPtrToIntInst(PtrToIntInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, + "_msprop_ptrtoint")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitIntToPtrInst(IntToPtrInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, + "_msprop_inttoptr")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitFPToSIInst(CastInst& I) { handleShadowOr(I); } + void visitFPToUIInst(CastInst& I) { handleShadowOr(I); } + void visitSIToFPInst(CastInst& I) { handleShadowOr(I); } + void visitUIToFPInst(CastInst& I) { handleShadowOr(I); } + void visitFPExtInst(CastInst& I) { handleShadowOr(I); } + void visitFPTruncInst(CastInst& I) { handleShadowOr(I); } + + /// \brief Propagate shadow for bitwise AND. + /// + /// This code is exact, i.e. if, for example, a bit in the left argument + /// is defined and 0, then neither the value not definedness of the + /// corresponding bit in B don't affect the resulting shadow. + void visitAnd(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // "And" of 0 and a poisoned value results in unpoisoned value. + // 1&1 => 1; 0&1 => 0; p&1 => p; + // 1&0 => 0; 0&0 => 0; p&0 => 0; + // 1&p => p; 0&p => 0; p&p => p; + // S = (S1 & S2) | (V1 & S2) | (S1 & V2) + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *V1 = I.getOperand(0); + Value *V2 = I.getOperand(1); + if (V1->getType() != S1->getType()) { + V1 = IRB.CreateIntCast(V1, S1->getType(), false); + V2 = IRB.CreateIntCast(V2, S2->getType(), false); + } + Value *S1S2 = IRB.CreateAnd(S1, S2); + Value *V1S2 = IRB.CreateAnd(V1, S2); + Value *S1V2 = IRB.CreateAnd(S1, V2); + setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2))); + setOriginForNaryOp(I); + } + + void visitOr(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // "Or" of 1 and a poisoned value results in unpoisoned value. + // 1|1 => 1; 0|1 => 1; p|1 => 1; + // 1|0 => 1; 0|0 => 0; p|0 => p; + // 1|p => 1; 0|p => p; p|p => p; + // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2) + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *V1 = IRB.CreateNot(I.getOperand(0)); + Value *V2 = IRB.CreateNot(I.getOperand(1)); + if (V1->getType() != S1->getType()) { + V1 = IRB.CreateIntCast(V1, S1->getType(), false); + V2 = IRB.CreateIntCast(V2, S2->getType(), false); + } + Value *S1S2 = IRB.CreateAnd(S1, S2); + Value *V1S2 = IRB.CreateAnd(V1, S2); + Value *S1V2 = IRB.CreateAnd(S1, V2); + setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2))); + setOriginForNaryOp(I); + } + + /// \brief Propagate origin for an instruction. + /// + /// This is a general case of origin propagation. For an Nary operation, + /// is set to the origin of an argument that is not entirely initialized. + /// If there is more than one such arguments, the rightmost of them is picked. + /// It does not matter which one is picked if all arguments are initialized. + void setOriginForNaryOp(Instruction &I) { + if (!ClTrackOrigins) return; + IRBuilder<> IRB(&I); + Value *Origin = getOrigin(&I, 0); + for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) { + Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB); + Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)), + getOrigin(&I, Op), Origin); + } + setOrigin(&I, Origin); + } + + /// \brief Propagate shadow for a binary operation. + /// + /// Shadow = Shadow0 | Shadow1, all 3 must have the same type. + /// Bitwise OR is selected as an operation that will never lose even a bit of + /// poison. + void handleShadowOrBinary(Instruction &I) { + IRBuilder<> IRB(&I); + Value *Shadow0 = getShadow(&I, 0); + Value *Shadow1 = getShadow(&I, 1); + setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop")); + setOriginForNaryOp(I); + } + + /// \brief Propagate shadow for arbitrary operation. + /// + /// This is a general case of shadow propagation, used in all cases where we + /// don't know and/or care about what the operation actually does. + /// It converts all input shadow values to a common type (extending or + /// truncating as necessary), and bitwise OR's them. + /// + /// This is much cheaper than inserting checks (i.e. requiring inputs to be + /// fully initialized), and less prone to false positives. + // FIXME: is the casting actually correct? + // FIXME: merge this with handleShadowOrBinary. + void handleShadowOr(Instruction &I) { + IRBuilder<> IRB(&I); + Value *Shadow = getShadow(&I, 0); + for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) + Shadow = IRB.CreateOr( + Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false), + "_msprop"); + Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false); + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + + void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); } + + void handleDiv(Instruction &I) { + IRBuilder<> IRB(&I); + // Strict on the second argument. + insertCheck(I.getOperand(1), &I); + setShadow(&I, getShadow(&I, 0)); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitUDiv(BinaryOperator &I) { handleDiv(I); } + void visitSDiv(BinaryOperator &I) { handleDiv(I); } + void visitFDiv(BinaryOperator &I) { handleDiv(I); } + void visitURem(BinaryOperator &I) { handleDiv(I); } + void visitSRem(BinaryOperator &I) { handleDiv(I); } + void visitFRem(BinaryOperator &I) { handleDiv(I); } + + /// \brief Instrument == and != comparisons. + /// + /// Sometimes the comparison result is known even if some of the bits of the + /// arguments are not. + void handleEqualityComparison(ICmpInst &I) { + IRBuilder<> IRB(&I); + Value *A = I.getOperand(0); + Value *B = I.getOperand(1); + Value *Sa = getShadow(A); + Value *Sb = getShadow(B); + if (A->getType()->isPointerTy()) + A = IRB.CreatePointerCast(A, MS.IntptrTy); + if (B->getType()->isPointerTy()) + B = IRB.CreatePointerCast(B, MS.IntptrTy); + // A == B <==> (C = A^B) == 0 + // A != B <==> (C = A^B) != 0 + // Sc = Sa | Sb + Value *C = IRB.CreateXor(A, B); + Value *Sc = IRB.CreateOr(Sa, Sb); + // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now) + // Result is defined if one of the following is true + // * there is a defined 1 bit in C + // * C is fully defined + // Si = !(C & ~Sc) && Sc + Value *Zero = Constant::getNullValue(Sc->getType()); + Value *MinusOne = Constant::getAllOnesValue(Sc->getType()); + Value *Si = + IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero), + IRB.CreateICmpEQ( + IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero)); + Si->setName("_msprop_icmp"); + setShadow(&I, Si); + setOriginForNaryOp(I); + } + + /// \brief Instrument signed relational comparisons. + /// + /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by + /// propagating the highest bit of the shadow. Everything else is delegated + /// to handleShadowOr(). + void handleSignedRelationalComparison(ICmpInst &I) { + Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); + Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); + Value* op = NULL; + CmpInst::Predicate pre = I.getPredicate(); + if (constOp0 && constOp0->isNullValue() && + (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { + op = I.getOperand(1); + } else if (constOp1 && constOp1->isNullValue() && + (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) { + op = I.getOperand(0); + } + if (op) { + IRBuilder<> IRB(&I); + Value* Shadow = + IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt"); + setShadow(&I, Shadow); + setOrigin(&I, getOrigin(op)); + } else { + handleShadowOr(I); + } + } + + void visitICmpInst(ICmpInst &I) { + if (ClHandleICmp && I.isEquality()) + handleEqualityComparison(I); + else if (ClHandleICmp && I.isSigned() && I.isRelational()) + handleSignedRelationalComparison(I); + else + handleShadowOr(I); + } + + void visitFCmpInst(FCmpInst &I) { + handleShadowOr(I); + } + + void handleShift(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // If any of the S2 bits are poisoned, the whole thing is poisoned. + // Otherwise perform the same shift on S1. + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), + S2->getType()); + Value *V2 = I.getOperand(1); + Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2); + setShadow(&I, IRB.CreateOr(Shift, S2Conv)); + setOriginForNaryOp(I); + } + + void visitShl(BinaryOperator &I) { handleShift(I); } + void visitAShr(BinaryOperator &I) { handleShift(I); } + void visitLShr(BinaryOperator &I) { handleShift(I); } + + /// \brief Instrument llvm.memmove + /// + /// At this point we don't know if llvm.memmove will be inlined or not. + /// If we don't instrument it and it gets inlined, + /// our interceptor will not kick in and we will lose the memmove. + /// If we instrument the call here, but it does not get inlined, + /// we will memove the shadow twice: which is bad in case + /// of overlapping regions. So, we simply lower the intrinsic to a call. + /// + /// Similar situation exists for memcpy and memset. + void visitMemMoveInst(MemMoveInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemmoveFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + // Similar to memmove: avoid copying shadow twice. + // This is somewhat unfortunate as it may slowdown small constant memcpys. + // FIXME: consider doing manual inline for small constant sizes and proper + // alignment. + void visitMemCpyInst(MemCpyInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemcpyFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + // Same as memcpy. + void visitMemSetInst(MemSetInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemsetFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + void visitVAStartInst(VAStartInst &I) { + VAHelper->visitVAStartInst(I); + } + + void visitVACopyInst(VACopyInst &I) { + VAHelper->visitVACopyInst(I); + } + + void handleBswap(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *Op = I.getArgOperand(0); + Type *OpType = Op->getType(); + Function *BswapFunc = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1)); + setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op))); + setOrigin(&I, getOrigin(Op)); + } + + void visitIntrinsicInst(IntrinsicInst &I) { + switch (I.getIntrinsicID()) { + case llvm::Intrinsic::bswap: + handleBswap(I); break; + default: + visitInstruction(I); break; + } + } + + void visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite"); + if (CS.isCall()) { + CallInst *Call = cast<CallInst>(&I); + + // For inline asm, do the usual thing: check argument shadow and mark all + // outputs as clean. Note that any side effects of the inline asm that are + // not immediately visible in its constraints are not handled. + if (Call->isInlineAsm()) { + visitInstruction(I); + return; + } + + // Allow only tail calls with the same types, otherwise + // we may have a false positive: shadow for a non-void RetVal + // will get propagated to a void RetVal. + if (Call->isTailCall() && Call->getType() != Call->getParent()->getType()) + Call->setTailCall(false); + + assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere"); + + // We are going to insert code that relies on the fact that the callee + // will become a non-readonly function after it is instrumented by us. To + // prevent this code from being optimized out, mark that function + // non-readonly in advance. + if (Function *Func = Call->getCalledFunction()) { + // Clear out readonly/readnone attributes. + AttrBuilder B; + B.addAttribute(Attributes::ReadOnly) + .addAttribute(Attributes::ReadNone); + Func->removeAttribute(AttributeSet::FunctionIndex, + Attributes::get(Func->getContext(), B)); + } + } + IRBuilder<> IRB(&I); + unsigned ArgOffset = 0; + DEBUG(dbgs() << " CallSite: " << I << "\n"); + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + unsigned i = ArgIt - CS.arg_begin(); + if (!A->getType()->isSized()) { + DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n"); + continue; + } + unsigned Size = 0; + Value *Store = 0; + // Compute the Shadow for arg even if it is ByVal, because + // in that case getShadow() will copy the actual arg shadow to + // __msan_param_tls. + Value *ArgShadow = getShadow(A); + Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset); + DEBUG(dbgs() << " Arg#" << i << ": " << *A << + " Shadow: " << *ArgShadow << "\n"); + if (CS.paramHasAttr(i + 1, Attributes::ByVal)) { + assert(A->getType()->isPointerTy() && + "ByVal argument is not a pointer!"); + Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType()); + unsigned Alignment = CS.getParamAlignment(i + 1); + Store = IRB.CreateMemCpy(ArgShadowBase, + getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), + Size, Alignment); + } else { + Size = MS.TD->getTypeAllocSize(A->getType()); + Store = IRB.CreateStore(ArgShadow, ArgShadowBase); + } + if (ClTrackOrigins) + IRB.CreateStore(getOrigin(A), + getOriginPtrForArgument(A, IRB, ArgOffset)); + assert(Size != 0 && Store != 0); + DEBUG(dbgs() << " Param:" << *Store << "\n"); + ArgOffset += DataLayout::RoundUpAlignment(Size, 8); + } + DEBUG(dbgs() << " done with call args\n"); + + FunctionType *FT = + cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0)); + if (FT->isVarArg()) { + VAHelper->visitCallSite(CS, IRB); + } + + // Now, get the shadow for the RetVal. + if (!I.getType()->isSized()) return; + IRBuilder<> IRBBefore(&I); + // Untill we have full dynamic coverage, make sure the retval shadow is 0. + Value *Base = getShadowPtrForRetval(&I, IRBBefore); + IRBBefore.CreateStore(getCleanShadow(&I), Base); + Instruction *NextInsn = 0; + if (CS.isCall()) { + NextInsn = I.getNextNode(); + } else { + BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest(); + if (!NormalDest->getSinglePredecessor()) { + // FIXME: this case is tricky, so we are just conservative here. + // Perhaps we need to split the edge between this BB and NormalDest, + // but a naive attempt to use SplitEdge leads to a crash. + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + return; + } + NextInsn = NormalDest->getFirstInsertionPt(); + assert(NextInsn && + "Could not find insertion point for retval shadow load"); + } + IRBuilder<> IRBAfter(NextInsn); + setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter), + "_msret")); + if (ClTrackOrigins) + setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter))); + } + + void visitReturnInst(ReturnInst &I) { + IRBuilder<> IRB(&I); + if (Value *RetVal = I.getReturnValue()) { + // Set the shadow for the RetVal. + Value *Shadow = getShadow(RetVal); + Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); + DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n"); + IRB.CreateStore(Shadow, ShadowPtr); + if (ClTrackOrigins) + IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); + } + } + + void visitPHINode(PHINode &I) { + IRBuilder<> IRB(&I); + ShadowPHINodes.push_back(&I); + setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(), + "_msphi_s")); + if (ClTrackOrigins) + setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), + "_msphi_o")); + } + + void visitAllocaInst(AllocaInst &I) { + setShadow(&I, getCleanShadow(&I)); + if (!ClPoisonStack) return; + IRBuilder<> IRB(I.getNextNode()); + uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType()); + if (ClPoisonStackWithCall) { + IRB.CreateCall2(MS.MsanPoisonStackFn, + IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), + ConstantInt::get(MS.IntptrTy, Size)); + } else { + Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB); + IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern), + Size, I.getAlignment()); + } + + if (ClTrackOrigins) { + setOrigin(&I, getCleanOrigin()); + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + // We create a string with a description of the stack allocation and + // pass it into __msan_set_alloca_origin. + // It will be printed by the run-time if stack-originated UMR is found. + // The first 4 bytes of the string are set to '----' and will be replaced + // by __msan_va_arg_overflow_size_tls at the first call. + StackDescription << "----" << I.getName() << "@" << F.getName(); + Value *Descr = + createPrivateNonConstGlobalForString(*F.getParent(), + StackDescription.str()); + IRB.CreateCall3(MS.MsanSetAllocaOriginFn, + IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), + ConstantInt::get(MS.IntptrTy, Size), + IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())); + } + } + + void visitSelectInst(SelectInst& I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateSelect(I.getCondition(), + getShadow(I.getTrueValue()), getShadow(I.getFalseValue()), + "_msprop")); + if (ClTrackOrigins) + setOrigin(&I, IRB.CreateSelect(I.getCondition(), + getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue()))); + } + + void visitLandingPadInst(LandingPadInst &I) { + // Do nothing. + // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1 + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + + void visitGetElementPtrInst(GetElementPtrInst &I) { + handleShadowOr(I); + } + + void visitExtractValueInst(ExtractValueInst &I) { + IRBuilder<> IRB(&I); + Value *Agg = I.getAggregateOperand(); + DEBUG(dbgs() << "ExtractValue: " << I << "\n"); + Value *AggShadow = getShadow(Agg); + DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n"); + Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices()); + DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n"); + setShadow(&I, ResShadow); + setOrigin(&I, getCleanOrigin()); + } + + void visitInsertValueInst(InsertValueInst &I) { + IRBuilder<> IRB(&I); + DEBUG(dbgs() << "InsertValue: " << I << "\n"); + Value *AggShadow = getShadow(I.getAggregateOperand()); + Value *InsShadow = getShadow(I.getInsertedValueOperand()); + DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n"); + DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n"); + Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices()); + DEBUG(dbgs() << " Res: " << *Res << "\n"); + setShadow(&I, Res); + setOrigin(&I, getCleanOrigin()); + } + + void dumpInst(Instruction &I) { + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n"; + } else { + errs() << "ZZZ " << I.getOpcodeName() << "\n"; + } + errs() << "QQQ " << I << "\n"; + } + + void visitResumeInst(ResumeInst &I) { + DEBUG(dbgs() << "Resume: " << I << "\n"); + // Nothing to do here. + } + + void visitInstruction(Instruction &I) { + // Everything else: stop propagating and check for poisoned shadow. + if (ClDumpStrictInstructions) + dumpInst(I); + DEBUG(dbgs() << "DEFAULT: " << I << "\n"); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) + insertCheck(I.getOperand(i), &I); + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } +}; + +/// \brief AMD64-specific implementation of VarArgHelper. +struct VarArgAMD64Helper : public VarArgHelper { + // An unfortunate workaround for asymmetric lowering of va_arg stuff. + // See a comment in visitCallSite for more details. + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64FpEndOffset = 176; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgOverflowSize; + + SmallVector<CallInst*, 16> VAStartInstrumentationList; + + VarArgAMD64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { } + + enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + + ArgKind classifyArgument(Value* arg) { + // A very rough approximation of X86_64 argument classification rules. + Type *T = arg->getType(); + if (T->isFPOrFPVectorTy() || T->isX86_MMXTy()) + return AK_FloatingPoint; + if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) + return AK_GeneralPurpose; + if (T->isPointerTy()) + return AK_GeneralPurpose; + return AK_Memory; + } + + // For VarArg functions, store the argument shadow in an ABI-specific format + // that corresponds to va_list layout. + // We do this because Clang lowers va_arg in the frontend, and this pass + // only sees the low level code that deals with va_list internals. + // A much easier alternative (provided that Clang emits va_arg instructions) + // would have been to associate each live instance of va_list with a copy of + // MSanParamTLS, and extract shadow on va_arg() call in the argument list + // order. + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) { + unsigned GpOffset = 0; + unsigned FpOffset = AMD64GpEndOffset; + unsigned OverflowOffset = AMD64FpEndOffset; + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + ArgKind AK = classifyArgument(A); + if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) + AK = AK_Memory; + if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset) + AK = AK_Memory; + Value *Base; + switch (AK) { + case AK_GeneralPurpose: + Base = getShadowPtrForVAArgument(A, IRB, GpOffset); + GpOffset += 8; + break; + case AK_FloatingPoint: + Base = getShadowPtrForVAArgument(A, IRB, FpOffset); + FpOffset += 16; + break; + case AK_Memory: + uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset); + OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8); + } + IRB.CreateStore(MSV.getShadow(A), Base); + } + Constant *OverflowSize = + ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + /// \brief Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */24, /* alignment */16, false); + } + + void visitVACopyInst(VACopyInst &I) { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */ 24, /* alignment */ 16, false); + } + + void finalizeInstrumentation() { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + + Value *RegSaveAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, 16)), + Type::getInt64PtrTy(*MS.C)); + Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr = + MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, + AMD64FpEndOffset, 16); + + Value *OverflowArgAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, 8)), + Type::getInt64PtrTy(*MS.C)); + Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); + Value *OverflowArgAreaShadowPtr = + MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); + Value *SrcPtr = + getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); + } + } +}; + +VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor) { + return new VarArgAMD64Helper(Func, Msan, Visitor); +} + +} // namespace + +bool MemorySanitizer::runOnFunction(Function &F) { + MemorySanitizerVisitor Visitor(F, *this); + + // Clear out readonly/readnone attributes. + AttrBuilder B; + B.addAttribute(Attributes::ReadOnly) + .addAttribute(Attributes::ReadNone); + F.removeAttribute(AttributeSet::FunctionIndex, + Attributes::get(F.getContext(), B)); + + return Visitor.runOnFunction(); +} diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 1fe12545d2..8f8d027dca 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -13,20 +13,20 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" +#include "llvm/Transforms/Instrumentation.h" +#include "MaximumSpanningTree.h" #include "ProfilingUtils.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "MaximumSpanningTree.h" using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index cc27146ebc..8aefe5901c 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -45,24 +45,24 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-path-profiling" -#include "llvm/DerivedTypes.h" +#include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" #include "llvm/Analysis/PathNumbering.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/InstrTypes.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/TypeBuilder.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" +#include "llvm/TypeBuilder.h" #include <vector> #define HASH_THRESHHOLD 100000 diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d054b5e22f..f14a5d8a1e 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,27 +21,27 @@ #define DEBUG_TYPE "tsan" +#include "llvm/Transforms/Instrumentation.h" #include "BlackList.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" using namespace llvm; @@ -78,6 +78,7 @@ struct ThreadSanitizer : public FunctionPass { static char ID; // Pass identification, replacement for typeid. private: + void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I); bool instrumentAtomic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, @@ -130,18 +131,8 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { report_fatal_error("ThreadSanitizer interface function redefined"); } -bool ThreadSanitizer::doInitialization(Module &M) { - TD = getAnalysisIfAvailable<DataLayout>(); - if (!TD) - return false; - BL.reset(new BlackList(ClBlackListFile)); - - // Always insert a call to __tsan_init into the module's CTORs. +void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); - Value *TsanInit = M.getOrInsertFunction("__tsan_init", - IRB.getVoidTy(), NULL); - appendToGlobalCtors(M, cast<Function>(TsanInit), 0); - // Initialize the callbacks. TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); @@ -188,6 +179,8 @@ bool ThreadSanitizer::doInitialization(Module &M) { NamePart = "_fetch_or"; else if (op == AtomicRMWInst::Xor) NamePart = "_fetch_xor"; + else if (op == AtomicRMWInst::Nand) + NamePart = "_fetch_nand"; else continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); @@ -207,6 +200,20 @@ bool ThreadSanitizer::doInitialization(Module &M) { "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); +} + +bool ThreadSanitizer::doInitialization(Module &M) { + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(ClBlackListFile)); + + // Always insert a call to __tsan_init into the module's CTORs. + IRBuilder<> IRB(M.getContext()); + Value *TsanInit = M.getOrInsertFunction("__tsan_init", + IRB.getVoidTy(), NULL); + appendToGlobalCtors(M, cast<Function>(TsanInit), 0); + return true; } @@ -297,6 +304,7 @@ static bool isAtomic(Instruction *I) { bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; if (BL->isIn(F)) return false; + initializeCallbacks(*F.getParent()); SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp index 8ce439c018..54299edf4b 100644 --- a/lib/Transforms/NaCl/ExpandTls.cpp +++ b/lib/Transforms/NaCl/ExpandTls.cpp @@ -239,8 +239,8 @@ static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars, AttrBuilder B; B.addAttribute(Attributes::ReadOnly); B.addAttribute(Attributes::NoUnwind); - AttrListPtr ReadTpAttrs = AttrListPtr().addAttr( - M.getContext(), AttrListPtr::FunctionIndex, + AttributeSet ReadTpAttrs = AttributeSet().addAttr( + M.getContext(), AttributeSet::FunctionIndex, Attributes::get(M.getContext(), B)); Constant *ReadTpFunc = M.getOrInsertTargetIntrinsic("llvm.nacl.read.tp", ReadTpType, diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index b344952cc5..f43baf5a76 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "adce" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index cee5502656..6214e3b703 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -27,12 +27,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Scalar.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index b608a5535e..e6abfdf581 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -15,7 +15,16 @@ #define DEBUG_TYPE "codegenprepare" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" @@ -23,14 +32,6 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -38,7 +39,6 @@ #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" @@ -194,9 +194,20 @@ bool CodeGenPrepare::runOnFunction(Function &F) { WorkList.insert(*II); } - for (SmallPtrSet<BasicBlock*, 8>::iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) - DeleteDeadBlock(*I); + // Delete the dead blocks and any of their dead successors. + MadeChange |= !WorkList.empty(); + while (!WorkList.empty()) { + BasicBlock *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + + DeleteDeadBlock(BB); + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } // Merge pairs of basic blocks with unconditional branches, connected by // a single edge. diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 369720b3dc..27efde53cd 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -20,14 +20,14 @@ #define DEBUG_TYPE "constprop" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constant.h" +#include "llvm/DataLayout.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 3ec6f3dcc3..b5a2a25ba0 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,15 +13,15 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumPhis, "Number of phis propagated"); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index a2e074fae8..f260331c6d 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -18,12 +18,12 @@ #define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 736cc05e04..124892887c 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -17,25 +17,25 @@ #define DEBUG_TYPE "dse" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" #include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 101009dd64..6b622c73f0 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -14,18 +14,18 @@ #define DEBUG_TYPE "early-cse" #include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/ScopedHashTable.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <deque> using namespace llvm; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f003e06699..1c540b240c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -17,11 +17,6 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" @@ -37,11 +32,16 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" +#include "llvm/DataLayout.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 6301aad610..486a349c55 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,8 +53,10 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" @@ -62,10 +64,8 @@ #include "llvm/Intrinsics.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumMerged , "Number of globals merged"); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 310fd6147a..29f5a10e09 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -26,28 +26,28 @@ #define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Type.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumWidened , "Number of indvars widened"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index e7ffa09f17..4a4cd705e2 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -13,28 +13,28 @@ #define DEBUG_TYPE "jump-threading" #include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumThreads, "Number of jumps threaded"); @@ -216,19 +216,24 @@ bool JumpThreading::runOnFunction(Function &F) { } /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { +/// thread across it. Stop scanning the block when passing the threshold. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, + unsigned Threshold) { /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I = BB->getFirstNonPHI(); // FIXME: THREADING will delete values that are just used to compute the // branch, so they shouldn't count against the duplication cost. - // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; for (; !isa<TerminatorInst>(I); ++I) { + + // Stop scanning the block if we've reached the threshold. + if (Size > Threshold) + return Size; + // Debugger intrinsics don't incur code size. if (isa<DbgInfoIntrinsic>(I)) continue; @@ -1337,7 +1342,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, return false; } - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold); if (JumpThreadCost > Threshold) { DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); @@ -1481,7 +1486,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, return false; } - unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold); if (DuplicationCost > Threshold) { DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 4818437c24..7ef1d34d3f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -32,27 +32,27 @@ #define DEBUG_TYPE "licm" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Constants.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include <algorithm> using namespace llvm; @@ -90,6 +90,8 @@ namespace { AU.addRequired<TargetLibraryInfo>(); } + using llvm::Pass::doFinalization; + bool doFinalization() { assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 3771f5aa97..9c67e327e2 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -16,11 +16,11 @@ #define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a44e798f12..7807e9bb4f 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -43,19 +43,20 @@ #define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -63,16 +64,83 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); namespace { + + class LoopIdiomRecognize; + + /// This class defines some utility functions for loop idiom recognization. + class LIRUtil { + public: + /// Return true iff the block contains nothing but an uncondition branch + /// (aka goto instruction). + static bool isAlmostEmpty(BasicBlock *); + + static BranchInst *getBranch(BasicBlock *BB) { + return dyn_cast<BranchInst>(BB->getTerminator()); + } + + /// Return the condition of the branch terminating the given basic block. + static Value *getBrCondtion(BasicBlock *); + + /// Derive the precondition block (i.e the block that guards the loop + /// preheader) from the given preheader. + static BasicBlock *getPrecondBb(BasicBlock *PreHead); + }; + + /// This class is to recoginize idioms of population-count conducted in + /// a noncountable loop. Currently it only recognizes this pattern: + /// \code + /// while(x) {cnt++; ...; x &= x - 1; ...} + /// \endcode + class NclPopcountRecognize { + LoopIdiomRecognize &LIR; + Loop *CurLoop; + BasicBlock *PreCondBB; + + typedef IRBuilder<> IRBuilderTy; + + public: + explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR); + bool recognize(); + + private: + /// Take a glimpse of the loop to see if we need to go ahead recoginizing + /// the idiom. + bool preliminaryScreen(); + + /// Check if the given conditional branch is based on the comparison + /// beween a variable and zero, and if the variable is non-zero, the + /// control yeilds to the loop entry. If the branch matches the behavior, + /// the variable involved in the comparion is returned. This function will + /// be called to see if the precondition and postcondition of the loop + /// are in desirable form. + Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; + + /// Return true iff the idiom is detected in the loop. and 1) \p CntInst + /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the corresponding phi node. 3) \p Var is set to the value + /// whose population bits are being counted. + bool detectIdiom + (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; + + /// Insert ctpop intrinsic function and some obviously dead instructions. + void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var); + + /// Create llvm.ctpop.* intrinsic function. + CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); + }; + class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; const DataLayout *TD; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; + const ScalarTargetTransformInfo *STTI; public: static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0; } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -110,6 +178,36 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<TargetLibraryInfo>(); } + + const DataLayout *getDataLayout() { + return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>(); + } + + DominatorTree *getDominatorTree() { + return DT ? DT : (DT=&getAnalysis<DominatorTree>()); + } + + ScalarEvolution *getScalarEvolution() { + return SE ? SE : (SE = &getAnalysis<ScalarEvolution>()); + } + + TargetLibraryInfo *getTargetLibraryInfo() { + return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>()); + } + + const ScalarTargetTransformInfo *getScalarTargetTransformInfo() { + if (!STTI) { + TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + if (TTI) STTI = TTI->getScalarTargetTransformInfo(); + } + return STTI; + } + + Loop *getLoop() const { return CurLoop; } + + private: + bool runOnNoncountableLoop(); + bool runOnCountableLoop(); }; } @@ -172,24 +270,393 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, deleteDeadInstruction(I, SE, TLI); } -bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { - CurLoop = L; +//===----------------------------------------------------------------------===// +// +// Implementation of LIRUtil +// +//===----------------------------------------------------------------------===// - // If the loop could not be converted to canonical form, it must have an - // indirectbr in it, just give up. - if (!L->getLoopPreheader()) +// This fucntion will return true iff the given block contains nothing but goto. +// A typical usage of this function is to check if the preheader fucntion is +// "almost" empty such that generated intrinsic function can be moved across +// preheader and to be placed at the end of the preconditiona block without +// concerning of breaking data dependence. +bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { + if (BranchInst *Br = getBranch(BB)) { + return Br->isUnconditional() && BB->size() == 1; + } + return false; +} + +Value *LIRUtil::getBrCondtion(BasicBlock *BB) { + BranchInst *Br = getBranch(BB); + return Br ? Br->getCondition() : 0; +} + +BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { + if (BasicBlock *BB = PreHead->getSinglePredecessor()) { + BranchInst *Br = getBranch(BB); + return Br && Br->isConditional() ? BB : 0; + } + return 0; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of NclPopcountRecognize +// +//===----------------------------------------------------------------------===// + +NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { +} + +bool NclPopcountRecognize::preliminaryScreen() { + const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo(); + if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast) return false; - // Disable loop idiom recognition if the function's name is a common idiom. - StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + // Counting population are usually conducted by few arithmetic instrutions. + // Such instructions can be easilly "absorbed" by vacant slots in a + // non-compact loop. Therefore, recognizing popcount idiom only makes sense + // in a compact loop. + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) return false; - // The trip count of the loop must be analyzable. - SE = &getAnalysis<ScalarEvolution>(); - if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + BasicBlock *LoopBody = *(CurLoop->block_begin()); + if (LoopBody->size() >= 20) { + // The loop is too big, bail out. + return false; + } + + // It should have a preheader containing nothing but a goto instruction. + BasicBlock *PreHead = CurLoop->getLoopPreheader(); + if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead)) + return false; + + // It should have a precondition block where the generated popcount instrinsic + // function will be inserted. + PreCondBB = LIRUtil::getPrecondBb(PreHead); + if (!PreCondBB) + return false; + + return true; +} + +Value *NclPopcountRecognize::matchCondition (BranchInst *Br, + BasicBlock *LoopEntry) const { + if (!Br || !Br->isConditional()) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); + if (!Cond) + return 0; + + ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); + if (!CmpZero || !CmpZero->isZero()) + return 0; + + ICmpInst::Predicate Pred = Cond->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || + (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) + return Cond->getOperand(0); + + return 0; +} + +bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, + PHINode *&CntPhi, + Value *&Var) const { + // Following code tries to detect this idiom: + // + // if (x0 != 0) + // goto loop-exit // the precondition of the loop + // cnt0 = init-val; + // do { + // x1 = phi (x0, x2); + // cnt1 = phi(cnt0, cnt2); + // + // cnt2 = cnt1 + 1; + // ... + // x2 = x1 & (x1 - 1); + // ... + // } while(x != 0); + // + // loop-exit: + // + + // step 1: Check to see if the look-back branch match this pattern: + // "if (a!=0) goto loop-entry". + BasicBlock *LoopEntry; + Instruction *DefX2, *CountInst; + Value *VarX1, *VarX0; + PHINode *PhiX, *CountPhi; + + DefX2 = CountInst = 0; + VarX1 = VarX0 = 0; + PhiX = CountPhi = 0; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + { + if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry)) + DefX2 = dyn_cast<Instruction>(T); + else + return false; + } + + // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" + { + if (DefX2->getOpcode() != Instruction::And) + return false; + + BinaryOperator *SubOneOp; + + if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0)))) + VarX1 = DefX2->getOperand(1); + else { + VarX1 = DefX2->getOperand(0); + SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1)); + } + if (!SubOneOp) + return false; + + Instruction *SubInst = cast<Instruction>(SubOneOp); + ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1)); + if (!Dec || + !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || + (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) { + return false; + } + } + + // step 3: Check the recurrence of variable X + { + PhiX = dyn_cast<PHINode>(VarX1); + if (!PhiX || + (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { + return false; + } + } + + // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 + { + CountInst = NULL; + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), + IterE = LoopEntry->end(); Iter != IterE; Iter++) { + Instruction *Inst = Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0)); + if (!Phi || Phi->getParent() != LoopEntry) + continue; + + // Check if the result of the instruction is live of the loop. + bool LiveOutLoop = false; + for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); + I != E; I++) { + if ((cast<Instruction>(*I))->getParent() != LoopEntry) { + LiveOutLoop = true; break; + } + } + + if (LiveOutLoop) { + CountInst = Inst; + CountPhi = Phi; + break; + } + } + + if (!CountInst) + return false; + } + + // step 5: check if the precondition is in this form: + // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" + { + BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); + Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader()); + if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) + return false; + + CntInst = CountInst; + CntPhi = CountPhi; + Var = T; + } + + return true; +} + +void NclPopcountRecognize::transform(Instruction *CntInst, + PHINode *CntPhi, Value *Var) { + + ScalarEvolution *SE = LIR.getScalarEvolution(); + TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo(); + BasicBlock *PreHead = CurLoop->getLoopPreheader(); + BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); + const DebugLoc DL = CntInst->getDebugLoc(); + + // Assuming before transformation, the loop is following: + // if (x) // the precondition + // do { cnt++; x &= x - 1; } while(x); + + // Step 1: Insert the ctpop instruction at the end of the precondition block + IRBuilderTy Builder(PreCondBr); + Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; + { + PopCnt = createPopcntIntrinsic(Builder, Var, DL); + NewCount = PopCntZext = + Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType())); + + if (NewCount != PopCnt) + (cast<Instruction>(NewCount))->setDebugLoc(DL); + + // TripCnt is exactly the number of iterations the loop has + TripCnt = NewCount; + + // If the popoulation counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); + ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); + if (!InitConst || !InitConst->isZero()) { + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + (cast<Instruction>(NewCount))->setDebugLoc(DL); + } + } + + // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to + // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic + // function would be partial dead code, and downstream passes will drag + // it back from the precondition block to the preheader. + { + ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition()); + + Value *Opnd0 = PopCntZext; + Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); + if (PreCond->getOperand(0) != Var) + std::swap(Opnd0, Opnd1); + + ICmpInst *NewPreCond = + cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); + PreCond->replaceAllUsesWith(NewPreCond); + + deleteDeadInstruction(PreCond, *SE, TLI); + } + + // Step 3: Note that the population count is exactly the trip count of the + // loop in question, which enble us to to convert the loop from noncountable + // loop into a countable one. The benefit is twofold: + // + // - If the loop only counts population, the entire loop become dead after + // the transformation. It is lots easier to prove a countable loop dead + // than to prove a noncountable one. (In some C dialects, a infite loop + // isn't dead even if it computes nothing useful. In general, DCE needs + // to prove a noncountable loop finite before safely delete it.) + // + // - If the loop also performs something else, it remains alive. + // Since it is transformed to countable form, it can be aggressively + // optimized by some optimizations which are in general not applicable + // to a noncountable loop. + // + // After this step, this loop (conceptually) would look like following: + // newcnt = __builtin_ctpop(x); + // t = newcnt; + // if (x) + // do { cnt++; x &= x-1; t--) } while (t > 0); + BasicBlock *Body = *(CurLoop->block_begin()); + { + BranchInst *LbBr = LIRUtil::getBranch(Body); + ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); + Type *Ty = TripCnt->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); + + Builder.SetInsertPoint(LbCond); + Value *Opnd1 = cast<Value>(TcPhi); + Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1)); + Instruction *TcDec = + cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); + + TcPhi->addIncoming(TripCnt, PreHead); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? + CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0))); + } + + // Step 4: All the references to the original population counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctpop(). + { + SmallVector<Value *, 4> CntUses; + for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end(); + I != E; I++) { + if (cast<Instruction>(*I)->getParent() != Body) + CntUses.push_back(*I); + } + for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) { + (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount); + } + } + + // step 5: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} + +CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, + Value *Val, DebugLoc DL) { + Value *Ops[] = { Val }; + Type *Tys[] = { Val->getType() }; + + Module *M = (*(CurLoop->block_begin()))->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +/// recognize - detect population count idiom in a non-countable loop. If +/// detected, transform the relevant code to popcount intrinsic function +/// call, and return true; otherwise, return false. +bool NclPopcountRecognize::recognize() { + + if (!LIR.getScalarTargetTransformInfo()) return false; - const SCEV *BECount = SE->getBackedgeTakenCount(L); + + LIR.getScalarEvolution(); + + if (!preliminaryScreen()) + return false; + + Instruction *CntInst; + PHINode *CntPhi; + Value *Val; + if (!detectIdiom(CntInst, CntPhi, Val)) + return false; + + transform(CntInst, CntPhi, Val); + return true; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of LoopIdiomRecognize +// +//===----------------------------------------------------------------------===// + +bool LoopIdiomRecognize::runOnCountableLoop() { + const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); if (isa<SCEVCouldNotCompute>(BECount)) return false; // If this loop executes exactly one time, then it should be peeled, not @@ -199,24 +666,27 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // We require target data for now. - TD = getAnalysisIfAvailable<DataLayout>(); - if (TD == 0) return false; + if (!getDataLayout()) + return false; + + getDominatorTree(); - DT = &getAnalysis<DominatorTree>(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); + getTargetLibraryInfo(); + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" - << L->getHeader()->getParent()->getName() - << "] Loop %" << L->getHeader()->getName() << "\n"); + << CurLoop->getHeader()->getParent()->getName() + << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. - for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; - ++BI) { + for (Loop::block_iterator BI = CurLoop->block_begin(), + E = CurLoop->block_end(); BI != E; ++BI) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; @@ -226,6 +696,33 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { return MadeChange; } +bool LoopIdiomRecognize::runOnNoncountableLoop() { + NclPopcountRecognize Popcount(*this); + if (Popcount.recognize()) + return true; + + return false; +} + +bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + CurLoop = L; + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!L->getLoopPreheader()) + return false; + + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + + SE = &getAnalysis<ScalarEvolution>(); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); + return runOnNoncountableLoop(); +} + /// runOnLoopBlock - Process the specified block, which lives in a counted loop /// with the specified backedge count. This block is known to be in the current /// loop and not in any subloops. diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 558f62e6b4..10ba22434a 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-instsimplify" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Debug.h" #include "llvm/DataLayout.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions simplified"); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index abe07aa9d3..249baf5164 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -13,20 +13,20 @@ #define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; #define MAX_HEADER_SIZE 16 diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 958348d9fa..d571ba3fe0 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -54,27 +54,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/AddressingMode.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/Support/Debug.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 0d781ac977..2b15528411 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -13,16 +13,16 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-unroll" -#include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/DataLayout.h" #include <climits> using namespace llvm; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 047b43eb84..d41da4a9a9 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -28,25 +28,25 @@ #define DEBUG_TYPE "loop-unswitch" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <map> #include <set> diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 517657cf52..26b6269f42 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include <list> diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp index 4ac361835d..9109e22592 100644 --- a/lib/Transforms/Scalar/NaClCcRewrite.cpp +++ b/lib/Transforms/Scalar/NaClCcRewrite.cpp @@ -596,7 +596,7 @@ void UpdateFunctionSignature(Function &F, Attributes fattr = F.getAttributes().getFnAttributes(); if (fattr.hasAttributes()) new_attributes_vec.push_back(AttributeWithIndex::get(~0, fattr)); - F.setAttributes(AttrListPtr::get(F.getContext(), new_attributes_vec)); + F.setAttributes(AttributeSet::get(F.getContext(), new_attributes_vec)); } @@ -805,7 +805,7 @@ void ExtractOperandsAndAttributesFromCallInst( std::vector<Value*>& operands, std::vector<Attributes>& attributes) { - AttrListPtr PAL = call->getAttributes(); + AttributeSet PAL = call->getAttributes(); // last operand is: function for (size_t i = 0; i < call->getNumOperands() - 1; ++i) { operands.push_back(call->getArgOperand(i)); @@ -819,7 +819,7 @@ void ExtractOperandsAndAttributesFromeInvokeInst( InvokeInst* call, std::vector<Value*>& operands, std::vector<Attributes>& attributes) { - AttrListPtr PAL = call->getAttributes(); + AttributeSet PAL = call->getAttributes(); // last three operands are: function, bb-normal, bb-exception for (size_t i = 0; i < call->getNumOperands() - 3; ++i) { operands.push_back(call->getArgOperand(i)); diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index dfdf50549d..ce397658bf 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -29,9 +29,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // A handy option to enable/disable all optimizations in this file. @@ -132,12 +132,12 @@ namespace { // ARC Utilities. //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Intrinsics.h" #include "llvm/Module.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/Transforms/Utils/Local.h" namespace { /// InstructionClass - A simple classification for instructions. @@ -660,9 +660,9 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { // ARC AliasAnalysis. //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" namespace { /// ObjCARCAliasAnalysis - This is a simple alias analysis @@ -912,8 +912,8 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // ARC autorelease pool elimination. //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" namespace { /// ObjCARCAPElim - Autorelease pool elimination. @@ -1093,10 +1093,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) { // TODO: Delete release+retain pairs (rare). +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1788,8 +1788,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, @@ -1804,8 +1804,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, @@ -1818,8 +1818,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); ReleaseCallee = M->getOrInsertFunction( @@ -1834,8 +1834,8 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainCallee = M->getOrInsertFunction( @@ -1856,7 +1856,7 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { M->getOrInsertFunction( "objc_retainBlock", FunctionType::get(Params[0], Params, /*isVarArg=*/false), - AttrListPtr()); + AttributeSet()); } return RetainBlockCallee; } @@ -1865,8 +1865,8 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); AutoreleaseCallee = M->getOrInsertFunction( @@ -3756,9 +3756,9 @@ void ObjCARCOpt::releaseMemory() { // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#include "llvm/Operator.h" -#include "llvm/InlineAsm.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/InlineAsm.h" +#include "llvm/Operator.h" STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); @@ -3840,8 +3840,8 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { Type *I8XX = PointerType::getUnqual(I8X); Type *Params[] = { I8XX, I8X }; - AttrListPtr Attributes = AttrListPtr() - .addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = AttributeSet() + .addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)) .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture)); @@ -3860,8 +3860,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); @@ -3875,8 +3875,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 377c07020d..569439aaf4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -22,7 +22,12 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -30,16 +35,11 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index ea1de63de7..5524e01230 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -18,15 +18,15 @@ #define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/BasicBlock.h" #include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 686520e724..28aaddc50e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -19,26 +19,26 @@ #define DEBUG_TYPE "sccp" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" +#include "llvm/InstVisitor.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 8284e144b2..1c220ca0f6 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,33 +25,34 @@ #define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" +#include "llvm/InstVisitor.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -404,106 +405,17 @@ private: }; } -template <typename DerivedT, typename RetT> -class AllocaPartitioning::BuilderBase - : public InstVisitor<DerivedT, RetT> { -public: - BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : TD(TD), - AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), - P(P) { - enqueueUsers(AI, 0); - } - -protected: - const DataLayout &TD; - const uint64_t AllocSize; - AllocaPartitioning &P; - - SmallPtrSet<Use *, 8> VisitedUses; - - struct OffsetUse { - Use *U; - int64_t Offset; - }; - SmallVector<OffsetUse, 8> Queue; - - // The active offset and use while visiting. - Use *U; - int64_t Offset; - - void enqueueUsers(Instruction &I, int64_t UserOffset) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); - UI != UE; ++UI) { - if (VisitedUses.insert(&UI.getUse())) { - OffsetUse OU = { &UI.getUse(), UserOffset }; - Queue.push_back(OU); - } - } - } - - bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { - GEPOffset = Offset; - for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) - continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - uint64_t ElementOffset = SL->getElementOffset(ElementIdx); - // Check that we can continue to model this GEP in a signed 64-bit offset. - if (ElementOffset > INT64_MAX || - (GEPOffset >= 0 && - ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) { - DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " - << "what can be represented in an int64_t!\n" - << " alloca: " << P.AI << "\n"); - return false; - } - if (GEPOffset < 0) - GEPOffset = ElementOffset + (uint64_t)-GEPOffset; - else - GEPOffset += ElementOffset; - continue; - } - - APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); - Index *= APInt(Index.getBitWidth(), - TD.getTypeAllocSize(GTI.getIndexedType())); - Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, - /*isSigned*/true); - // Check if the result can be stored in our int64_t offset. - if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) { - DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " - << "what can be represented in an int64_t!\n" - << " alloca: " << P.AI << "\n"); - return false; - } - - GEPOffset = Index.getSExtValue(); - } - return true; +static Value *foldSelectInst(SelectInst &SI) { + // If the condition being selected on is a constant or the same value is + // being selected between, fold the select. Yes this does (rarely) happen + // early on. + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) + return SI.getOperand(1+CI->isZero()); + if (SI.getOperand(1) == SI.getOperand(2)) { + return SI.getOperand(1); } - - Value *foldSelectInst(SelectInst &SI) { - // If the condition being selected on is a constant or the same value is - // being selected between, fold the select. Yes this does (rarely) happen - // early on. - if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) - return SI.getOperand(1+CI->isZero()); - if (SI.getOperand(1) == SI.getOperand(2)) { - assert(*U == SI.getOperand(1)); - return SI.getOperand(1); - } - return 0; - } -}; + return 0; +} /// \brief Builder for the alloca partitioning. /// @@ -511,60 +423,38 @@ protected: /// of an alloca and splitting the partitions for each load and store at each /// offset. class AllocaPartitioning::PartitionBuilder - : public BuilderBase<PartitionBuilder, bool> { - friend class InstVisitor<PartitionBuilder, bool>; + : public PtrUseVisitor<PartitionBuilder> { + friend class PtrUseVisitor<PartitionBuilder>; + friend class InstVisitor<PartitionBuilder>; + typedef PtrUseVisitor<PartitionBuilder> Base; + + const uint64_t AllocSize; + AllocaPartitioning &P; SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap; public: - PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {} - - /// \brief Run the builder over the allocation. - bool operator()() { - // Note that we have to re-evaluate size on each trip through the loop as - // the queue grows at the tail. - for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { - U = Queue[Idx].U; - Offset = Queue[Idx].Offset; - if (!visit(cast<Instruction>(U->getUser()))) - return false; - } - return true; - } + PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P) + : PtrUseVisitor<PartitionBuilder>(DL), + AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), + P(P) {} private: - bool markAsEscaping(Instruction &I) { - P.PointerEscapingInstr = &I; - return false; - } - - void insertUse(Instruction &I, int64_t Offset, uint64_t Size, + void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which have a zero size or don't overlap the - // allocation. - if (Size == 0 || - (Offset >= 0 && (uint64_t)Offset >= AllocSize) || - (Offset < 0 && (uint64_t)-Offset >= Size)) { + // Completely skip uses which have a zero size or start either before or + // past the end of the allocation. + if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset - << " which starts past the end of the " << AllocSize - << " byte alloca:\n" + << " which has zero size or starts outside of the " + << AllocSize << " byte alloca:\n" << " alloca: " << P.AI << "\n" << " use: " << I << "\n"); return; } - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset - << " to start at the beginning of the alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - Size -= (uint64_t)-Offset; - Offset = 0; - } - - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; + uint64_t BeginOffset = Offset.getZExtValue(); + uint64_t EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. @@ -585,9 +475,9 @@ private: P.Partitions.push_back(New); } - bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset, + void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, bool IsVolatile) { - uint64_t Size = TD.getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); // If this memory access can be shown to *statically* extend outside the // bounds of of the allocation, it's behavior is undefined, so simply @@ -596,15 +486,15 @@ private: // risk of overflow. // FIXME: We should instead consider the pointer to have escaped if this // function is being instrumented for addressing bugs or race conditions. - if (Offset < 0 || (uint64_t)Offset >= AllocSize || - Size > (AllocSize - (uint64_t)Offset)) { + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << P.AI << "\n" << " use: " << I << "\n"); - return true; + return; } // We allow splitting of loads and stores where the type is an integer type @@ -615,54 +505,61 @@ private: IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; insertUse(I, Offset, Size, IsSplittable); - return true; - } - - bool visitBitCastInst(BitCastInst &BC) { - enqueueUsers(BC, Offset); - return true; - } - - bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { - int64_t GEPOffset; - if (!computeConstantGEPOffset(GEPI, GEPOffset)) - return markAsEscaping(GEPI); - - enqueueUsers(GEPI, GEPOffset); - return true; } - bool visitLoadInst(LoadInst &LI) { + void visitLoadInst(LoadInst &LI) { assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + + if (!IsOffsetKnown) + return PI.setAborted(&LI); + return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); } - bool visitStoreInst(StoreInst &SI) { + void visitStoreInst(StoreInst &SI) { Value *ValOp = SI.getValueOperand(); if (ValOp == *U) - return markAsEscaping(SI); + return PI.setEscapedAndAborted(&SI); + if (!IsOffsetKnown) + return PI.setAborted(&SI); assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); } - bool visitMemSetInst(MemSetInst &II) { + void visitMemSetInst(MemSetInst &II) { assert(II.getRawDest() == *U && "Pointer use is not the destination?"); ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - insertUse(II, Offset, Size, Length); - return true; + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) + // Zero-length mem transfer intrinsics can be ignored entirely. + return; + + if (!IsOffsetKnown) + return PI.setAborted(&II); + + insertUse(II, Offset, + Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue(), + (bool)Length); } - bool visitMemTransferInst(MemTransferInst &II) { + void visitMemTransferInst(MemTransferInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - if (!Size) + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) // Zero-length mem transfer intrinsics can be ignored entirely. - return true; + return; + + if (!IsOffsetKnown) + return PI.setAborted(&II); + + uint64_t RawOffset = Offset.getLimitedValue(); + uint64_t Size = Length ? Length->getLimitedValue() + : AllocSize - RawOffset; MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; @@ -670,12 +567,12 @@ private: Offsets.IsSplittable = Length; if (*U == II.getRawDest()) { - Offsets.DestBegin = Offset; - Offsets.DestEnd = Offset + Size; + Offsets.DestBegin = RawOffset; + Offsets.DestEnd = RawOffset + Size; } if (*U == II.getRawSource()) { - Offsets.SourceBegin = Offset; - Offsets.SourceEnd = Offset + Size; + Offsets.SourceBegin = RawOffset; + Offsets.SourceEnd = RawOffset + Size; } // If we have set up end offsets for both the source and the destination, @@ -688,7 +585,7 @@ private: // In that case, we can completely elide the transfer. if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) { P.Partitions[PrevIdx].kill(); - return true; + return; } // Otherwise we have an offset transfer within the same alloca. We can't @@ -701,7 +598,7 @@ private: // For non-volatile transfers this is a no-op. if (!II.isVolatile()) - return true; + return; // Otherwise just suppress splitting. Offsets.IsSplittable = false; @@ -721,23 +618,25 @@ private: "Already have intrinsic in map but haven't seen both ends"); (void)Inserted; } - - return true; } // Disable SRoA for any intrinsics except for lifetime invariants. // FIXME: What about debug instrinsics? This matches old behavior, but // doesn't make sense. - bool visitIntrinsicInst(IntrinsicInst &II) { + void visitIntrinsicInst(IntrinsicInst &II) { + if (!IsOffsetKnown) + return PI.setAborted(&II); + if (II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end) { ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); - uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue()); + uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), + Length->getLimitedValue()); insertUse(II, Offset, Size, true); - return true; + return; } - return markAsEscaping(II); + Base::visitIntrinsicInst(II); } Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { @@ -757,14 +656,14 @@ private: llvm::tie(UsedI, I) = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - Size = std::max(Size, TD.getTypeStoreSize(LI->getType())); + Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); continue; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { Value *Op = SI->getOperand(0); if (Op == UsedI) return SI; - Size = std::max(Size, TD.getTypeStoreSize(Op->getType())); + Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); continue; } @@ -785,54 +684,62 @@ private: return 0; } - bool visitPHINode(PHINode &PN) { + void visitPHINode(PHINode &PN) { + if (PN.use_empty()) + return; + if (!IsOffsetKnown) + return PI.setAborted(&PN); + // See if we already have computed info on this node. std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN]; if (PHIInfo.first) { PHIInfo.second = true; insertUse(PN, Offset, PHIInfo.first); - return true; + return; } // Check for an unsafe use of the PHI node. - if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) - return markAsEscaping(*EscapingI); + if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) + return PI.setAborted(UnsafeI); insertUse(PN, Offset, PHIInfo.first); - return true; } - bool visitSelectInst(SelectInst &SI) { + void visitSelectInst(SelectInst &SI) { + if (SI.use_empty()) + return; if (Value *Result = foldSelectInst(SI)) { if (Result == *U) // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. - enqueueUsers(SI, Offset); + enqueueUsers(SI); - return true; + return; } + if (!IsOffsetKnown) + return PI.setAborted(&SI); // See if we already have computed info on this node. std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI]; if (SelectInfo.first) { SelectInfo.second = true; insertUse(SI, Offset, SelectInfo.first); - return true; + return; } // Check for an unsafe use of the PHI node. - if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) - return markAsEscaping(*EscapingI); + if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) + return PI.setAborted(UnsafeI); insertUse(SI, Offset, SelectInfo.first); - return true; } /// \brief Disable SROA entirely if there are unhandled users of the alloca. - bool visitInstruction(Instruction &I) { return markAsEscaping(I); } + void visitInstruction(Instruction &I) { + PI.setAborted(&I); + } }; - /// \brief Use adder for the alloca partitioning. /// /// This class adds the uses of an alloca to all of the partitions which they @@ -851,26 +758,22 @@ private: /// partition space is pre-sorted, and do a logarithmic search for the /// partition needed, making the total visit a classical ((N + M) * log(N)) /// complexity operation. -class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> { +class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> { + friend class PtrUseVisitor<UseBuilder>; friend class InstVisitor<UseBuilder>; + typedef PtrUseVisitor<UseBuilder> Base; + + const uint64_t AllocSize; + AllocaPartitioning &P; /// \brief Set to de-duplicate dead instructions found in the use walk. SmallPtrSet<Instruction *, 4> VisitedDeadInsts; public: UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : BuilderBase<UseBuilder>(TD, AI, P) {} - - /// \brief Run the builder over the allocation. - void operator()() { - // Note that we have to re-evaluate size on each trip through the loop as - // the queue grows at the tail. - for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { - U = Queue[Idx].U; - Offset = Queue[Idx].Offset; - this->visit(cast<Instruction>(U->getUser())); - } - } + : PtrUseVisitor<UseBuilder>(TD), + AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), + P(P) {} private: void markAsDead(Instruction &I) { @@ -878,20 +781,14 @@ private: P.DeadUsers.push_back(&I); } - void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { + void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) { // If the use has a zero size or extends outside of the allocation, record // it as a dead use for elimination later. - if (Size == 0 || (uint64_t)Offset >= AllocSize || - (Offset < 0 && (uint64_t)-Offset >= Size)) + if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) return markAsDead(User); - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - Size -= (uint64_t)-Offset; - Offset = 0; - } - - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; + uint64_t BeginOffset = Offset.getZExtValue(); + uint64_t EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. @@ -914,15 +811,15 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) { - uint64_t Size = TD.getTypeStoreSize(Ty); + void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { + uint64_t Size = DL.getTypeStoreSize(Ty); // If this memory access can be shown to *statically* extend outside the // bounds of of the allocation, it's behavior is undefined, so simply // ignore it. Note that this is more strict than the generic clamping // behavior of insertUse. - if (Offset < 0 || (uint64_t)Offset >= AllocSize || - Size > (AllocSize - (uint64_t)Offset)) + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) return markAsDead(I); insertUse(I, Offset, Size); @@ -932,40 +829,47 @@ private: if (BC.use_empty()) return markAsDead(BC); - enqueueUsers(BC, Offset); + return Base::visitBitCastInst(BC); } void visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (GEPI.use_empty()) return markAsDead(GEPI); - int64_t GEPOffset; - if (!computeConstantGEPOffset(GEPI, GEPOffset)) - llvm_unreachable("Unable to compute constant offset for use"); - - enqueueUsers(GEPI, GEPOffset); + return Base::visitGetElementPtrInst(GEPI); } void visitLoadInst(LoadInst &LI) { + assert(IsOffsetKnown); handleLoadOrStore(LI.getType(), LI, Offset); } void visitStoreInst(StoreInst &SI) { + assert(IsOffsetKnown); handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); } void visitMemSetInst(MemSetInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - insertUse(II, Offset, Size); + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) + return markAsDead(II); + + assert(IsOffsetKnown); + insertUse(II, Offset, Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue()); } void visitMemTransferInst(MemTransferInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - if (!Size) + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) return markAsDead(II); + assert(IsOffsetKnown); + uint64_t Size = Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue(); + MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && Offsets.DestBegin == Offsets.SourceBegin) @@ -975,34 +879,39 @@ private: } void visitIntrinsicInst(IntrinsicInst &II) { + assert(IsOffsetKnown); assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); - insertUse(II, Offset, - std::min(AllocSize - Offset, Length->getLimitedValue())); + insertUse(II, Offset, std::min(Length->getLimitedValue(), + AllocSize - Offset.getLimitedValue())); } - void insertPHIOrSelect(Instruction &User, uint64_t Offset) { + void insertPHIOrSelect(Instruction &User, const APInt &Offset) { uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first; // For PHI and select operands outside the alloca, we can't nuke the entire // phi or select -- the other side might still be relevant, so we special // case them here and use a separate structure to track the operands // themselves which should be replaced with undef. - if (Offset >= AllocSize) { + if ((Offset.isNegative() && Offset.uge(Size)) || + (!Offset.isNegative() && Offset.uge(AllocSize))) { P.DeadOperands.push_back(U); return; } insertUse(User, Offset, Size); } + void visitPHINode(PHINode &PN) { if (PN.use_empty()) return markAsDead(PN); + assert(IsOffsetKnown); insertPHIOrSelect(PN, Offset); } + void visitSelectInst(SelectInst &SI) { if (SI.use_empty()) return markAsDead(SI); @@ -1011,7 +920,7 @@ private: if (Result == *U) // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. - enqueueUsers(SI, Offset); + enqueueUsers(SI); else // Otherwise the operand to the select is dead, and we can replace it // with undef. @@ -1020,6 +929,7 @@ private: return; } + assert(IsOffsetKnown); insertPHIOrSelect(SI, Offset); } @@ -1131,8 +1041,15 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) #endif PointerEscapingInstr(0) { PartitionBuilder PB(TD, AI, *this); - if (!PB()) + PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI); + if (PtrI.isEscaped() || PtrI.isAborted()) { + // FIXME: We should sink the escape vs. abort info into the caller nicely, + // possibly by just storing the PtrInfo in the AllocaPartitioning. + PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + assert(PointerEscapingInstr && "Did not track a bad instruction"); return; + } // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. @@ -1166,7 +1083,9 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); - UB(); + PtrI = UB.visitPtr(AI); + assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); + assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -2164,6 +2083,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, AllocaPartitioning::const_use_iterator I, AllocaPartitioning::const_use_iterator E) { uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); + // Don't create integer types larger than the maximum bitwidth. + if (SizeInBits > IntegerType::MAX_INT_BITS) + return false; // Don't try to handle allocas with bit-padding. if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) @@ -2202,7 +2124,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } @@ -2218,7 +2140,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 39630fd027..762bb15c59 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -13,14 +13,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" +#include "llvm-c/Transforms/Scalar.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a46d09c320..c8656fbd8e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -21,8 +21,15 @@ #define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -34,19 +41,12 @@ #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9f24bb635e..9160f04fe2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -23,19 +23,19 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Attributes.h" -#include "llvm/Support/CFG.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" #include "llvm/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 0788f19014..d4643b9d80 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -17,25 +17,24 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! +#include "llvm/DataLayout.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! +#include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); //===----------------------------------------------------------------------===// @@ -82,289 +81,6 @@ public: //===----------------------------------------------------------------------===// -// Helper Functions -//===----------------------------------------------------------------------===// - -static bool CallHasFloatingPointArgument(const CallInst *CI) { - for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); - it != e; ++it) { - if ((*it)->getType()->isFloatingPointTy()) - return true; - } - return false; -} - -namespace { -//===----------------------------------------------------------------------===// -// Formatting and IO Optimizations -//===----------------------------------------------------------------------===// - -//===---------------------------------------===// -// 'sprintf' Optimizations - -struct SPrintFOpt : public LibCallOptimization { - Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, - IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; - - // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumArgOperands() == 2) { - // Make sure there's no % in the constant array. We could try to handle - // %% -> % in the future if we cared. - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') - return 0; // we found a format specifier, bail out. - - // These optimizations require DataLayout. - if (!TD) return 0; - - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), // Copy the - FormatStr.size() + 1), 1); // nul byte. - return ConstantInt::get(CI->getType(), FormatStr.size()); - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return 0; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = CastToCStr(CI->getArgOperand(0), B); - B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); - B.CreateStore(B.getInt8(0), Ptr); - - return ConstantInt::get(CI->getType(), 1); - } - - if (FormatStr[1] == 's') { - // These optimizations require DataLayout. - if (!TD) return 0; - - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); - if (!Len) - return 0; - Value *IncLen = B.CreateAdd(Len, - ConstantInt::get(Len->getType(), 1), - "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); - - // The sprintf result is the unincremented number of bytes in the string. - return B.CreateIntCast(Len, CI->getType(), false); - } - return 0; - } - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { - return V; - } - - // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating - // point arguments. - if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *SIPrintFFn = - M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SIPrintFFn); - B.Insert(New); - return New; - } - return 0; - } -}; - -//===---------------------------------------===// -// 'fwrite' Optimizations - -struct FWriteOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require a pointer, an integer, an integer, a pointer, returning integer. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - !FT->getParamType(2)->isIntegerTy() || - !FT->getParamType(3)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeC || !CountC) return 0; - uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); - - // If this is writing zero records, remove the call (it's a noop). - if (Bytes == 0) - return ConstantInt::get(CI->getType(), 0); - - // If this is writing one byte, turn it into fputc. - // This optimisation is only valid, if the return value is unused. - if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'fputs' Optimizations - -struct FPutsOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - // Require two pointers. Also, we can't optimize if return value is used. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !CI->use_empty()) - return 0; - - // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) return 0; - // Known to have no uses (see above). - return EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - } -}; - -//===---------------------------------------===// -// 'fprintf' Optimizations - -struct FPrintFOpt : public LibCallOptimization { - Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, - IRBuilder<> &B) { - // All the optimizations depend on the format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; - - // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumArgOperands() == 2) { - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return 0; // We found a format specifier. - - // These optimizations require DataLayout. - if (!TD) return 0; - - Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return 0; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, - TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; - } - - if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) - return 0; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - } - return 0; - } - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { - return V; - } - - // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no - // floating point arguments. - if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *FIPrintFFn = - M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(FIPrintFFn); - B.Insert(New); - return New; - } - return 0; - } -}; - -//===---------------------------------------===// -// 'puts' Optimizations - -struct PutsOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - - // Check for a constant string. - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return 0; - - if (Str.empty() && CI->use_empty()) { - // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); - if (CI->use_empty() || !Res) return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } - - return 0; - } -}; - -} // end anonymous namespace. - -//===----------------------------------------------------------------------===// // SimplifyLibCalls Pass Implementation //===----------------------------------------------------------------------===// @@ -375,10 +91,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - // Formatting and IO Optimizations - SPrintFOpt SPrintF; - FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; - PutsOpt Puts; bool Modified; // This is only used by doInitialization. public: @@ -433,12 +145,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // Formatting and IO Optimizations - Optimizations["sprintf"] = &SPrintF; - AddOpt(LibFunc::fwrite, &FWrite); - AddOpt(LibFunc::fputs, &FPuts); - Optimizations["fprintf"] = &FPrintF; - Optimizations["puts"] = &Puts; } @@ -486,7 +192,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Something changed! Changed = true; - ++NumSimplified; // Inspect the instruction after the call (which was potentially just // added) next. diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 34f1d6c622..cde9c178ad 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "sink" #include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6557d630a9..e357378524 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,8 +52,12 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -61,16 +65,12 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 6815e411b4..3a19b706ea 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CallSite.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 9fea11391a..e8833f2092 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -13,20 +13,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Constant.h" -#include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Constant.h" #include "llvm/DataLayout.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 6b04e3d17b..385ceb13b2 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -17,17 +17,17 @@ #define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumBroken, "Number of blocks inserted"); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 74b2ee10e0..62b79bf2b3 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -20,10 +22,8 @@ #include "llvm/LLVMContext.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Type.h" using namespace llvm; @@ -43,12 +43,12 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", - AttrListPtr::get(M->getContext(), + AttributeSet::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), @@ -72,12 +72,12 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", - AttrListPtr::get(M->getContext(), + AttributeSet::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), @@ -101,13 +101,13 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AttributeWithIndex AWI = - AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); Constant *StrChr = M->getOrInsertFunction("strchr", - AttrListPtr::get(M->getContext(), + AttributeSet::get(M->getContext(), AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), @@ -129,12 +129,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", - AttrListPtr::get(M->getContext(), + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), @@ -160,11 +160,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -184,11 +184,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, - AttrListPtr::get(M->getContext(), + AttributeSet::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); @@ -210,11 +210,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -239,11 +239,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -269,12 +269,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -293,7 +293,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, - const AttrListPtr &Attrs) { + const AttributeSet &Attrs) { SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. @@ -348,11 +348,11 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -372,12 +372,12 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction("fputc", - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -406,13 +406,13 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FPutsName, - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -439,14 +439,14 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FWriteName, - AttrListPtr::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index bee2f7bcb6..1699a3b648 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -16,11 +16,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bypass-slow-division" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Instructions.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 7ba9f6d9d2..12f2e4b83e 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/ADT/SmallVector.h" #include <map> using namespace llvm; @@ -99,12 +99,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(NewFunc->getContext(), - AttrListPtr::ReturnIndex, + AttributeSet::ReturnIndex, OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(NewFunc->getContext(), - AttrListPtr::FunctionIndex, + AttributeSet::FunctionIndex, OldFunc->getAttributes() .getFnAttributes())); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1dac6b5b8b..114babd101 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" #include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - // Copy all of the dependent libraries over. - for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) - New->addLibrary(*I); - // Loop over all of the global variables, making corresponding globals in the // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 281714f4c1..a596df64fd 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,6 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -21,18 +28,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> using namespace llvm; diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 99b5830163..f8a0cafadc 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Type.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 303de56d95..c176cf1075 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,8 +13,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/IRBuilder.h" @@ -22,12 +27,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" #include "llvm/Module.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 55227e2714..67dcbe446b 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "integer-division" +#include "llvm/Transforms/Utils/IntegerDivision.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" -#include "llvm/IRBuilder.h" -#include "llvm/Transforms/Utils/IntegerDivision.h" using namespace llvm; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 5e05c83c35..5dddb6e28a 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -29,17 +29,17 @@ #define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Pass.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" #include "llvm/Support/PredIteratorCache.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumLCSSA, "Number of live out of a loop variables"); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a954d82c05..0e56817a1b 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -13,8 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" @@ -26,20 +34,12 @@ #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 9d9e201665..6a68416a3d 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -39,26 +39,26 @@ #define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 20237500c3..d24b334681 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -18,12 +18,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 67e17f4ca8..242e7fa021 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -23,12 +23,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 02bdcda391..8756d26ca4 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -21,8 +23,6 @@ #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <vector> diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 930555424d..7b89ffd401 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -36,6 +36,8 @@ #define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -43,12 +45,10 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <csetjmp> #include <set> using namespace llvm; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1547439b5c..74a457ce81 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,16 +14,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index f4ca81af6d..70fbf13b97 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Instructions.h" #include "llvm/Function.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; STATISTIC(NumPromoted, "Number of alloca's promoted"); diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index ada8e3b11e..363e9367f3 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,9 +13,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Transforms/IPO.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Module.h" diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 558de9d12e..b41f433659 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -27,26 +27,26 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> using namespace llvm; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 72d4199a2a..e1e7f4d668 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" @@ -25,7 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 6c34eed13d..3cae77227c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,6 +13,14 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" @@ -25,15 +33,6 @@ #include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Operator.h" -#include "llvm/Type.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" @@ -42,9 +41,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Type.h" #include <algorithm> -#include <set> #include <map> +#include <set> using namespace llvm; static cl::opt<unsigned> @@ -3511,22 +3511,44 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const DataLayout *TD, + const TargetTransformInfo *TTI, const SmallDenseMap<PHINode*, Type*>& ResultTypes) { - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. - if (SI->getNumCases() * 10 >= TableSize * 4) - return true; - // If each table would fit in a register, we should build it anyway. + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), E = ResultTypes.end(); I != E; ++I) { - if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) - return false; + Type *Ty = I->second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || + !TTI->getScalarTargetTransformInfo()->isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; } - return true; + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; } /// SwitchToLookupTable - If the switch is only used to initialize one or more @@ -3607,7 +3629,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) + if (!ShouldBuildLookupTable(SI, TableSize, TD, TTI, ResultTypes)) return false; // Create the BB that does the lookups. diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 110f380857..5883293a81 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -15,18 +15,18 @@ #define DEBUG_TYPE "indvars" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/DataLayout.h" +#include "llvm/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 65353dc460..8b2eeb9928 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -15,18 +15,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6ca1175c41..11a1ee5207 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -15,14 +15,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" -#include "llvm/Module.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -1409,6 +1409,254 @@ struct PrintFOpt : public LibCallOptimization { } }; +struct SPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; + Value *IncLen = B.CreateAdd(Len, + ConstantInt::get(Len->getType(), 1), + "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FPrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + return 0; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FWriteOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require a pointer, an integer, an integer, a pointer, returning integer. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + return 0; + } +}; + +struct FPutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + // Require two pointers. Also, we can't optimize if return value is used. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) return 0; + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); + } +}; + +struct PutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + } // End anonymous namespace. namespace llvm { @@ -1464,6 +1712,11 @@ class LibCallSimplifierImpl { // Formatting and IO library call optimizations. PrintFOpt PrintF; + SPrintFOpt SPrintF; + FPrintFOpt FPrintF; + FWriteOpt FWrite; + FPutsOpt FPuts; + PutsOpt Puts; void initOptimizations(); void addOpt(LibFunc::Func F, LibCallOptimization* Opt); @@ -1588,6 +1841,11 @@ void LibCallSimplifierImpl::initOptimizations() { // Formatting and IO library call optimizations. addOpt(LibFunc::printf, &PrintF); + addOpt(LibFunc::sprintf, &SPrintF); + addOpt(LibFunc::fprintf, &FPrintF); + addOpt(LibFunc::fwrite, &FWrite); + addOpt(LibFunc::fputs, &FPuts); + addOpt(LibFunc::puts, &Puts); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index b1cad06dff..8cf62196cc 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/BasicBlock.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Type.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; char UnifyFunctionExitNodes::ID = 0; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index dacbc7f242..a48229132b 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -16,22 +16,13 @@ #define BBV_NAME "bb-vectorize" #define DEBUG_TYPE BBV_NAME -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -39,14 +30,23 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Vectorize.h" +#include "llvm/Type.h" #include <algorithm> #include <map> using namespace llvm; @@ -2903,6 +2903,7 @@ namespace { K->mutateType(getVecTypeForPair(L->getType(), H->getType())); combineMetadata(K, H); + K->intersectOptionalDataWith(H); for (unsigned o = 0; o < NumOperands; ++o) K->setOperand(o, ReplacedOperands[o]); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 55733f7f8a..feeececedb 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6,432 +6,50 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops -// and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizes uses (will use) the codegen -// interfaces to generate IR that is likely to result in an optimal binary. -// -// The loop vectorizer combines consecutive loop iteration into a single -// 'wide' iteration. After this transformation the index is incremented -// by the SIMD vector width, and not by one. -// -// This pass has three parts: -// 1. The main loop pass that drives the different parts. -// 2. LoopVectorizationLegality - A unit that checks for the legality -// of the vectorization. -// 3. SingleBlockLoopVectorizer - A unit that performs the actual -// widening of instructions. -// 4. LoopVectorizationCostModel - A unit that checks for the profitability -// of vectorization. It decides on the optimal vector width, which -// can be one, if vectorization is not profitable. -// -//===----------------------------------------------------------------------===// -// -// The reduction-variable vectorization is based on the paper: -// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. -// -// Variable uniformity checks are inspired by: -// Karrenberg, R. and Hack, S. Whole Function Vectorization. -// -// Other ideas/concepts are from: -// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. -// -// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of -// Vectorizing Compilers. -// -//===----------------------------------------------------------------------===// -#define LV_NAME "loop-vectorize" -#define DEBUG_TYPE LV_NAME -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Value.h" -#include "llvm/Function.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallVector.h" +#include "LoopVectorize.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" +#include "llvm/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include <algorithm> -using namespace llvm; +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Type.h" +#include "llvm/Value.h" static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, - cl::desc("Set the default vectorization width. Zero is autoselect.")); - -/// We don't vectorize loops with a known constant trip count below this number. -const unsigned TinyTripCountThreshold = 16; + cl::desc("Sets the SIMD width. Zero is autoselect.")); -/// When performing a runtime memory check, do not check more than this -/// number of pointers. Notice that the check is quadratic! -const unsigned RuntimeMemoryCheckThreshold = 2; - -/// This is the highest vector width that we try to generate. -const unsigned MaxVectorSize = 8; +static cl::opt<bool> +EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, + cl::desc("Enable if-conversion during vectorization.")); namespace { -// Forward declarations. -class LoopVectorizationLegality; -class LoopVectorizationCostModel; - -/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic -/// block to a specified vectorization factor (VF). -/// This class performs the widening of scalars into vectors, or multiple -/// scalars. This class also implements the following features: -/// * It inserts an epilogue loop for handling loops that don't have iteration -/// counts that are known to be a multiple of the vectorization factor. -/// * It handles the code generation for reduction variables. -/// * Scalarization (implementation using scalars) of un-vectorizable -/// instructions. -/// SingleBlockLoopVectorizer does not perform any vectorization-legality -/// checks, and relies on the caller to check for the different legality -/// aspects. The SingleBlockLoopVectorizer relies on the -/// LoopVectorizationLegality class to provide information about the induction -/// and reduction variables that were found to a given vectorization factor. -class SingleBlockLoopVectorizer { -public: - /// Ctor. - SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, - DominatorTree *Dt, DataLayout *Dl, - LPPassManager *Lpm, - unsigned VecWidth): - OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), LPM(Lpm), VF(VecWidth), - Builder(Se->getContext()), Induction(0), OldInduction(0) { } - - // Perform the actual loop widening (vectorization). - void vectorize(LoopVectorizationLegality *Legal) { - // Create a new empty loop. Unlink the old loop and connect the new one. - createEmptyLoop(Legal); - // Widen each instruction in the old loop to a new one in the new loop. - // Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); - // Register the new loop and update the analysis passes. - updateAnalysis(); - } - -private: - /// Add code that checks at runtime if the accessed arrays overlap. - /// Returns the comperator value or NULL if no check is needed. - Value *addRuntimeCheck(LoopVectorizationLegality *Legal, - Instruction *Loc); - /// Create an empty loop, based on the loop ranges of the old loop. - void createEmptyLoop(LoopVectorizationLegality *Legal); - /// Copy and widen the instructions from the old loop. - void vectorizeLoop(LoopVectorizationLegality *Legal); - /// Insert the new loop to the loop hierarchy and pass manager - /// and update the analysis passes. - void updateAnalysis(); - - /// This instruction is un-vectorizable. Implement it as a sequence - /// of scalars. - void scalarizeInstruction(Instruction *Instr); - - /// Create a broadcast instruction. This method generates a broadcast - /// instruction (shuffle) for loop invariant values and for the induction - /// value. If this is the induction variable then we extend it to N, N+1, ... - /// this is needed because each iteration in the loop corresponds to a SIMD - /// element. - Value *getBroadcastInstrs(Value *V); - - /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 .. - /// for each element in the vector. Starting from zero. - Value *getConsecutiveVector(Value* Val); - - /// When we go over instructions in the basic block we rely on previous - /// values within the current basic block or on loop invariant values. - /// When we widen (vectorize) values we place them in the map. If the values - /// are not within the map, they have to be loop invariant, so we simply - /// broadcast them into a vector. - Value *getVectorValue(Value *V); - - /// Get a uniform vector of constant integers. We use this to get - /// vectors of ones and zeros for the reduction code. - Constant* getUniformVector(unsigned Val, Type* ScalarTy); - - typedef DenseMap<Value*, Value*> ValueMap; - - /// The original loop. - Loop *OrigLoop; - // Scev analysis to use. - ScalarEvolution *SE; - // Loop Info. - LoopInfo *LI; - // Dominator Tree. - DominatorTree *DT; - // Data Layout. - DataLayout *DL; - // Loop Pass Manager; - LPPassManager *LPM; - // The vectorization factor to use. - unsigned VF; - - // The builder that we use - IRBuilder<> Builder; - - // --- Vectorization state --- - - /// The vector-loop preheader. - BasicBlock *LoopVectorPreHeader; - /// The scalar-loop preheader. - BasicBlock *LoopScalarPreHeader; - /// Middle Block between the vector and the scalar. - BasicBlock *LoopMiddleBlock; - ///The ExitBlock of the scalar loop. - BasicBlock *LoopExitBlock; - ///The vector loop body. - BasicBlock *LoopVectorBody; - ///The scalar loop body. - BasicBlock *LoopScalarBody; - ///The first bypass block. - BasicBlock *LoopBypassBlock; - - /// The new Induction variable which was added to the new block. - PHINode *Induction; - /// The induction variable of the old basic block. - PHINode *OldInduction; - // Maps scalars to widened vectors. - ValueMap WidenMap; -}; - -/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and -/// to what vectorization factor. -/// This class does not look at the profitability of vectorization, only the -/// legality. This class has two main kinds of checks: -/// * Memory checks - The code in canVectorizeMemory checks if vectorization -/// will change the order of memory accesses in a way that will change the -/// correctness of the program. -/// * Scalars checks - The code in canVectorizeBlock checks for a number -/// of different conditions, such as the availability of a single induction -/// variable, that all types are supported and vectorize-able, etc. -/// This code reflects the capabilities of SingleBlockLoopVectorizer. -/// This class is also used by SingleBlockLoopVectorizer for identifying -/// induction variable and the different reduction variables. -class LoopVectorizationLegality { -public: - LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl): - TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { } - - /// This represents the kinds of reductions that we support. - enum ReductionKind { - NoReduction, /// Not a reduction. - IntegerAdd, /// Sum of numbers. - IntegerMult, /// Product of numbers. - IntegerOr, /// Bitwise or logical OR of numbers. - IntegerAnd, /// Bitwise or logical AND of numbers. - IntegerXor /// Bitwise or logical XOR of numbers. - }; - - /// This POD struct holds information about reduction variables. - struct ReductionDescriptor { - // Default C'tor - ReductionDescriptor(): - StartValue(0), LoopExitInstr(0), Kind(NoReduction) {} - - // C'tor. - ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K): - StartValue(Start), LoopExitInstr(Exit), Kind(K) {} - - // The starting value of the reduction. - // It does not have to be zero! - Value *StartValue; - // The instruction who's value is used outside the loop. - Instruction *LoopExitInstr; - // The kind of the reduction. - ReductionKind Kind; - }; - - // This POD struct holds information about the memory runtime legality - // check that a group of pointers do not overlap. - struct RuntimePointerCheck { - RuntimePointerCheck(): Need(false) {} - - /// Reset the state of the pointer runtime information. - void reset() { - Need = false; - Pointers.clear(); - Starts.clear(); - Ends.clear(); - } - - /// Insert a pointer and calculate the start and end SCEVs. - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr) { - const SCEV *Sc = SE->getSCEV(Ptr); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); - assert(AR && "Invalid addrec expression"); - const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader()); - const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.push_back(Ptr); - Starts.push_back(AR->getStart()); - Ends.push_back(ScEnd); - } - - /// This flag indicates if we need to add the runtime check. - bool Need; - /// Holds the pointers that we need to check. - SmallVector<Value*, 2> Pointers; - /// Holds the pointer value at the beginning of the loop. - SmallVector<const SCEV*, 2> Starts; - /// Holds the pointer value at the end of the loop. - SmallVector<const SCEV*, 2> Ends; - }; - - /// ReductionList contains the reduction descriptors for all - /// of the reductions that were found in the loop. - typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; - - /// InductionList saves induction variables and maps them to the initial - /// value entring the loop. - typedef DenseMap<PHINode*, Value*> InductionList; - - /// Returns true if it is legal to vectorize this loop. - /// This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. - bool canVectorize(); - - /// Returns the Induction variable. - PHINode *getInduction() {return Induction;} - - /// Returns the reduction variables found in the loop. - ReductionList *getReductionVars() { return &Reductions; } - - /// Returns the induction variables found in the loop. - InductionList *getInductionVars() { return &Inductions; } - - /// Check if this pointer is consecutive when vectorizing. This happens - /// when the last index of the GEP is the induction variable, or that the - /// pointer itself is an induction variable. - /// This check allows us to vectorize A[idx] into a wide load/store. - bool isConsecutivePtr(Value *Ptr); - - /// Returns true if the value V is uniform within the loop. - bool isUniform(Value *V); - - /// Returns true if this instruction will remain scalar after vectorization. - bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} - - /// Returns the information that we collected about runtime memory check. - RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } -private: - /// Check if a single basic block loop is vectorizable. - /// At this point we know that this is a loop with a constant trip count - /// and we only need to check individual instructions. - bool canVectorizeBlock(BasicBlock &BB); - - /// When we vectorize loops we may change the order in which - /// we read and write from memory. This method checks if it is - /// legal to vectorize the code, considering only memory constrains. - /// Returns true if BB is vectorizable - bool canVectorizeMemory(BasicBlock &BB); - - /// Returns True, if 'Phi' is the kind of reduction variable for type - /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. - bool AddReductionVar(PHINode *Phi, ReductionKind Kind); - /// Returns true if the instruction I can be a reduction variable of type - /// 'Kind'. - bool isReductionInstr(Instruction *I, ReductionKind Kind); - /// Returns True, if 'Phi' is an induction variable. - bool isInductionVariable(PHINode *Phi); - /// Return true if can compute the address bounds of Ptr within the loop. - bool hasComputableBounds(Value *Ptr); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// DataLayout analysis. - DataLayout *DL; - - // --- vectorization state --- // - - /// Holds the integer induction variable. This is the counter of the - /// loop. - PHINode *Induction; - /// Holds the reduction variables. - ReductionList Reductions; - /// Holds all of the induction variables that we found in the loop. - /// Notice that inductions don't need to start at zero and that induction - /// variables can be pointers. - InductionList Inductions; - - /// Allowed outside users. This holds the reduction - /// vars which can be accessed from outside the loop. - SmallPtrSet<Value*, 4> AllowedExit; - /// This set holds the variables which are known to be uniform after - /// vectorization. - SmallPtrSet<Instruction*, 4> Uniforms; - /// We need to check that all of the pointers in this list are disjoint - /// at runtime. - RuntimePointerCheck PtrRtCheck; -}; - -/// LoopVectorizationCostModel - estimates the expected speedups due to -/// vectorization. -/// In many cases vectorization is not profitable. This can happen because -/// of a number of reasons. In this class we mainly attempt to predict -/// the expected speedup/slowdowns due to the supported instruction set. -/// We use the VectorTargetTransformInfo to query the different backends -/// for the cost of different operations. -class LoopVectorizationCostModel { -public: - /// C'tor. - LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, - LoopVectorizationLegality *Leg, - const VectorTargetTransformInfo *Vtti): - TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } - - /// Returns the most profitable vectorization factor for the loop that is - /// smaller or equal to the VF argument. This method checks every power - /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize); - -private: - /// Returns the expected execution cost. The unit of the cost does - /// not matter because we use the 'cost' units to compare different - /// vector widths. The cost that is returned is *not* normalized by - /// the factor width. - unsigned expectedCost(unsigned VF); - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - unsigned getInstructionCost(Instruction *I, unsigned VF); - - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - - /// Vectorization legality. - LoopVectorizationLegality *Legal; - /// Vector target information. - const VectorTargetTransformInfo *VTTI; -}; - +/// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -460,7 +78,7 @@ struct LoopVectorize : public LoopPass { L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL); + LoopVectorizationLegality LVL(L, SE, DL, DT); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; @@ -491,7 +109,7 @@ struct LoopVectorize : public LoopPass { "\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF); + InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -511,11 +129,44 @@ struct LoopVectorize : public LoopPass { }; -Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { +}// namespace + +//===----------------------------------------------------------------------===// +// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and +// LoopVectorizationCostModel. +//===----------------------------------------------------------------------===// + +void +LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, + Loop *Lp, Value *Ptr) { + const SCEV *Sc = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch()); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + Pointers.push_back(Ptr); + Starts.push_back(AR->getStart()); + Ends.push_back(ScEnd); +} + +Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { // Create the types. LLVMContext &C = V->getContext(); Type *VTy = VectorType::get(V->getType(), VF); Type *I32 = IntegerType::getInt32Ty(C); + + // Save the current insertion location. + Instruction *Loc = Builder.GetInsertPoint(); + + // We need to place the broadcast of invariant variables outside the loop. + Instruction *Instr = dyn_cast<Instruction>(V); + bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody); + bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr; + + // Place the code for broadcasting invariant variables in the new preheader. + if (Invariant) + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + Constant *Zero = ConstantInt::get(I32, 0); Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF)); Value *UndefVal = UndefValue::get(VTy); @@ -523,27 +174,28 @@ Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero); // Broadcast the scalar into all locations in the vector. Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros, - "broadcast"); - // We are accessing the induction variable. Make sure to promote the - // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes. - if (V == Induction) - return getConsecutiveVector(Shuf); + "broadcast"); + + // Restore the builder insertion point. + if (Invariant) + Builder.SetInsertPoint(Loc); + return Shuf; } -Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { +Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, bool Negate) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && "Elem must be an integer"); // Create the types. Type *ITy = Val->getType()->getScalarType(); VectorType *Ty = cast<VectorType>(Val->getType()); - unsigned VLen = Ty->getNumElements(); + int VLen = Ty->getNumElements(); SmallVector<Constant*, 8> Indices; // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VLen; ++i) - Indices.push_back(ConstantInt::get(ITy, i)); + for (int i = 0; i < VLen; ++i) + Indices.push_back(ConstantInt::get(ITy, Negate ? (-i): i )); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); @@ -554,10 +206,13 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); - // If this pointer is an induction variable, return it. + // If this value is a pointer induction variable we know it is consecutive. PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); - if (Phi && getInductionVars()->count(Phi)) - return true; + if (Phi && Inductions.count(Phi)) { + InductionInfo II = Inductions[Phi]; + if (PtrInduction == II.IK) + return true; + } GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr); if (!Gep) @@ -571,7 +226,7 @@ bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) return false; - // We can emit wide load/stores only of the last index is the induction + // We can emit wide load/stores only if the last index is the induction // variable. const SCEV *Last = SE->getSCEV(LastIndex); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { @@ -590,7 +245,8 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); } -Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { +Value *InnerLoopVectorizer::getVectorValue(Value *V) { + assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); // If we saved a vectorized copy of V, use it. Value *&MapEntry = WidenMap[V]; @@ -604,11 +260,11 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { } Constant* -SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { +InnerLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true)); } -void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. SmallVector<Value*, 8> Params; @@ -619,7 +275,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { // If we are accessing the old induction variable, use the new one. if (SrcOp == OldInduction) { - Params.push_back(getVectorValue(Induction)); + Params.push_back(getVectorValue(SrcOp)); continue; } @@ -679,10 +335,10 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { } Value* -SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, - Instruction *Loc) { +InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc) { LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = - Legal->getRuntimePointerCheck(); + Legal->getRuntimePointerCheck(); if (!PtrRtCheck->Need) return NULL; @@ -695,22 +351,21 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, SCEVExpander Exp(*SE, "induction"); // Use this type for pointer arithmetic. - Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType(); + Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0); - for (unsigned i=0; i < NumPointers; ++i) { + for (unsigned i = 0; i < NumPointers; ++i) { Value *Ptr = PtrRtCheck->Pointers[i]; const SCEV *Sc = SE->getSCEV(Ptr); if (SE->isLoopInvariant(Sc, OrigLoop)) { - DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" << + DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << *Ptr <<"\n"); Starts.push_back(Ptr); Ends.push_back(Ptr); } else { DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n"); - Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], - PtrArithTy, Loc); + Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc); Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc); Starts.push_back(Start); Ends.push_back(End); @@ -719,10 +374,16 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { + Instruction::CastOps Op = Instruction::BitCast; + Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc); + Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc); + Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc); + Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc); + Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Starts[i], Ends[j], "bound0", Loc); + Start0, End1, "bound0", Loc); Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Starts[j], Ends[i], "bound1", Loc); + Start1, End0, "bound1", Loc); Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, "found.conflict", Loc); if (MemoryRuntimeCheck) @@ -740,32 +401,32 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, } void -SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { +InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass. - / | - / v -| [ ] <-- vector pre header. -| | -| v -| [ ] \ -| [ ]_| <-- vector loop. -| | - \ v + [ ] <-- vector loop bypass. + / | + / v + | [ ] <-- vector pre header. + | | + | v + | [ ] \ + | [ ]_| <-- vector loop. + | | + \ v >[ ] <--- middle-block. - / | - / v -| [ ] <--- new preheader. -| | -| v -| [ ] \ -| [ ]_| <-- old scalar loop to handle remainder. - \ | - \ v + / | + / v + | [ ] <--- new preheader. + | | + | v + | [ ] \ + | [ ]_| <-- old scalar loop to handle remainder. + \ | + \ v >[ ] <-- exit block. ... */ @@ -781,10 +442,10 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // don't have a single induction variable. OldInduction = Legal->getInduction(); Type *IdxTy = OldInduction ? OldInduction->getType() : - DL->getIntPtrType(SE->getContext()); + DL->getIntPtrType(SE->getContext()); // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); // Get the total trip count from the count by adding 1. @@ -803,10 +464,9 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. Value *StartIdx = OldInduction ? - OldInduction->getIncomingValueForBlock(BypassBlock): - ConstantInt::get(IdxTy, 0); + OldInduction->getIncomingValueForBlock(BypassBlock): + ConstantInt::get(IdxTy, 0); - assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); // Generate the code that checks in runtime if arrays overlap. @@ -815,13 +475,13 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Split the single block loop into the two loop structure described above. BasicBlock *VectorPH = - BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); + BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); BasicBlock *VecBody = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); BasicBlock *MiddleBlock = - VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); + VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = - MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); + MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); // This is the location in which we add all of the logic for bypassing // the new vector loop. @@ -878,8 +538,8 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // PHIs that are left in the scalar version of the loop. // The starting values of PHI nodes depend on the counter of the last // iteration in the vectorized loop. - // If we come from a bypass edge then we need to start from the original start - // value. + // If we come from a bypass edge then we need to start from the original + // start value. // This variable saves the new starting index for the scalar loop. PHINode *ResumeIndex = 0; @@ -887,27 +547,54 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); for (I = List->begin(), E = List->end(); I != E; ++I) { PHINode *OrigPhi = I->first; + LoopVectorizationLegality::InductionInfo II = I->second; PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val", - MiddleBlock->getTerminator()); + MiddleBlock->getTerminator()); Value *EndValue = 0; - if (OrigPhi->getType()->isIntegerTy()) { + switch (II.IK) { + case LoopVectorizationLegality::NoInduction: + llvm_unreachable("Unknown induction"); + case LoopVectorizationLegality::IntInduction: { // Handle the integer induction counter: + assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); assert(OrigPhi == OldInduction && "Unknown integer PHI"); // We know what the end value is. EndValue = IdxEndRoundDown; // We also know which PHI node holds it. ResumeIndex = ResumeVal; - } else { + break; + } + case LoopVectorizationLegality::ReverseIntInduction: { + // Convert the CountRoundDown variable to the PHI size. + unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits(); + unsigned IISize = II.StartValue->getType()->getScalarSizeInBits(); + Value *CRD = CountRoundDown; + if (CRDSize > IISize) + CRD = CastInst::Create(Instruction::Trunc, CountRoundDown, + II.StartValue->getType(), + "tr.crd", BypassBlock->getTerminator()); + else if (CRDSize < IISize) + CRD = CastInst::Create(Instruction::SExt, CountRoundDown, + II.StartValue->getType(), + "sext.crd", BypassBlock->getTerminator()); + // Handle reverse integer induction counter: + EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", + BypassBlock->getTerminator()); + break; + } + case LoopVectorizationLegality::PtrInduction: { // For pointer induction variables, calculate the offset using // the end index. - EndValue = GetElementPtrInst::Create(I->second, CountRoundDown, + EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end", BypassBlock->getTerminator()); + break; } + }// end of case // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - ResumeVal->addIncoming(I->second, BypassBlock); + ResumeVal->addIncoming(II.StartValue, BypassBlock); ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). @@ -956,19 +643,22 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); - // Register the new loop. + // Create and register the new vector loop. Loop* Lp = new Loop(); - LPM->insertLoop(Lp, OrigLoop->getParentLoop()); - - Lp->addBasicBlockToLoop(VecBody, LI->getBase()); - Loop *ParentLoop = OrigLoop->getParentLoop(); + + // Insert the new loop into the loop nest and register the new basic blocks. if (ParentLoop) { + ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); + } else { + LI->addTopLevelLoop(Lp); } + Lp->addBasicBlockToLoop(VecBody, LI->getBase()); + // Save the state. LoopVectorPreHeader = VectorPH; LoopScalarPreHeader = ScalarPH; @@ -1000,8 +690,37 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K) { } } +static bool +isTriviallyVectorizableIntrinsic(Instruction *Inst) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); + if (!II) + return false; + switch (II->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::pow: + case Intrinsic::fma: + return true; + default: + return false; + } + return false; +} + void -SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { +InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { //===------------------------------------------------===// // // Notice: any optimization or new instruction that go @@ -1009,14 +728,13 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - typedef SmallVector<PHINode*, 4> PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); - Constant *Zero = ConstantInt::get( - IntegerType::getInt32Ty(BB.getContext()), 0); + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0); // In order to support reduction variables we need to be able to vectorize // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two - // steages. First, we create a new vector PHI node with no incoming edges. + // stages. First, we create a new vector PHI node with no incoming edges. // We use this value when we vectorize all of the instructions that use the // PHI. Next, after all of the instructions in the block are complete we // add the new incoming edges to the PHI. At this point all of the @@ -1024,245 +742,17 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // construct the PHI. PhiVector RdxPHIsToFix; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *Inst = it; - - switch (Inst->getOpcode()) { - case Instruction::Br: - // Nothing to do for PHIs and BR, since we already took care of the - // loop control flow instructions. - continue; - case Instruction::PHI:{ - PHINode* P = cast<PHINode>(Inst); - // Handle reduction variables: - if (Legal->getReductionVars()->count(P)) { - // This is phase one of vectorizing PHIs. - Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi", - LoopVectorBody->getFirstInsertionPt()); - RdxPHIsToFix.push_back(P); - continue; - } - - // This PHINode must be an induction variable. - // Make sure that we know about it. - assert(Legal->getInductionVars()->count(P) && - "Not an induction variable"); - - if (P->getType()->isIntegerTy()) { - assert(P == OldInduction && "Unexpected PHI"); - WidenMap[Inst] = getBroadcastInstrs(Induction); - continue; - } - - // Handle pointer inductions. - assert(P->getType()->isPointerTy() && "Unexpected type."); - Value *StartIdx = OldInduction ? - Legal->getInductionVars()->lookup(OldInduction) : - ConstantInt::get(Induction->getType(), 0); - - // This is the pointer value coming into the loop. - Value *StartPtr = Legal->getInductionVars()->lookup(P); - - // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, - "normalized.idx"); + // Scan the loop in a topological order to ensure that defs are vectorized + // before users. + LoopBlocksDFS DFS(OrigLoop); + DFS.perform(LI); - // This is the vector of results. Notice that we don't generate vector - // geps because scalar geps result in better code. - Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); - for (unsigned int i = 0; i < VF; ++i) { - Constant *Idx = ConstantInt::get(Induction->getType(), i); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep"); - VecVal = Builder.CreateInsertElement(VecVal, SclrGep, - Builder.getInt32(i), - "insert.gep"); - } + // Vectorize all of the blocks in the original loop. + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), + be = DFS.endRPO(); bb != be; ++bb) + vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix); - WidenMap[Inst] = VecVal; - continue; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen binops. - BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - - // Use this vector value for all users of the original instruction. - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); - WidenMap[Inst] = V; - - // Update the NSW, NUW and Exact flags. - BinaryOperator *VecOp = cast<BinaryOperator>(V); - if (isa<OverflowingBinaryOperator>(BinOp)) { - VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); - VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); - } - if (isa<PossiblyExactOperator>(VecOp)) - VecOp->setIsExact(BinOp->isExact()); - break; - } - case Instruction::Select: { - // Widen selects. - // If the selector is loop invariant we can create a select - // instruction with a scalar condition. Otherwise, use vector-select. - Value *Cond = Inst->getOperand(0); - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - Cond = getVectorValue(Cond); - if (InvariantCond) - Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); - - Value *Op0 = getVectorValue(Inst->getOperand(1)); - Value *Op1 = getVectorValue(Inst->getOperand(2)); - WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1); - break; - } - - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (Inst->getOpcode() == Instruction::FCmp); - CmpInst *Cmp = dyn_cast<CmpInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - if (FCmp) - WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); - else - WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B); - break; - } - - case Instruction::Store: { - // Attempt to issue a wide store. - StoreInst *SI = dyn_cast<StoreInst>(Inst); - Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); - Value *Ptr = SI->getPointerOperand(); - unsigned Alignment = SI->getAlignment(); - - assert(!Legal->isUniform(Ptr) && - "We do not allow storing to uniform addresses"); - - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - - // This store does not use GEPs. - if (!Legal->isConsecutivePtr(Ptr)) { - scalarizeInstruction(Inst); - break; - } - - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); - Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); - } - Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); - Value *Val = getVectorValue(SI->getValueOperand()); - Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); - break; - } - case Instruction::Load: { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast<LoadInst>(Inst); - Type *RetTy = VectorType::get(LI->getType(), VF); - Value *Ptr = LI->getPointerOperand(); - unsigned Alignment = LI->getAlignment(); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - - // If the pointer is loop invariant or if it is non consecutive, - // scalarize the load. - bool Con = Legal->isConsecutivePtr(Ptr); - if (Legal->isUniform(Ptr) || !Con) { - scalarizeInstruction(Inst); - break; - } - - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); - Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); - } - - Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); - LI = Builder.CreateLoad(Ptr); - LI->setAlignment(Alignment); - // Use this vector value for all users of the load. - WidenMap[Inst] = LI; - break; - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - /// Vectorize bitcasts. - CastInst *CI = dyn_cast<CastInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); - WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy); - break; - } - - default: - /// All other instructions are unsupported. Scalarize them. - scalarizeInstruction(Inst); - break; - }// end of switch. - }// end of for_each instr. - - // At this point every instruction in the original loop is widended to + // At this point every instruction in the original loop is widened to // a vector form. We are almost done. Now, we need to fix the PHI nodes // that we vectorized. The PHI nodes are currently empty because we did // not want to introduce cycles. Notice that the remaining PHI nodes @@ -1281,7 +771,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { assert(Legal->getReductionVars()->count(RdxPhi) && "Unable to find the reduction variable"); LoopVectorizationLegality::ReductionDescriptor RdxDesc = - (*Legal->getReductionVars())[RdxPhi]; + (*Legal->getReductionVars())[RdxPhi]; // We need to generate a reduction vector from the incoming scalar. // To do so, we need to generate the 'identity' vector and overide @@ -1301,7 +791,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // This vector is the Identity vector where the first element is the // incoming scalar reduction. Value *VectorStart = Builder.CreateInsertElement(Identity, - RdxDesc.StartValue, Zero); + RdxDesc.StartValue, Zero); // Fix the vector-loop phi. // We created the induction variable so we know that the @@ -1311,8 +801,8 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Reductions do not have to start at zero. They can start with // any loop invariant values. VecRdxPhi->addIncoming(VectorStart, VecPreheader); - unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); + Value *Val = + getVectorValue(RdxPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); VecRdxPhi->addIncoming(Val, LoopVectorBody); // Before each round, move the insertion point right between @@ -1329,29 +819,29 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Extract the first scalar. Value *Scalar0 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); + Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); // Extract and reduce the remaining vector elements. for (unsigned i=1; i < VF; ++i) { Value *Scalar1 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); + Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); switch (RdxDesc.Kind) { - case LoopVectorizationLegality::IntegerAdd: - Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerMult: - Scalar0 = Builder.CreateMul(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerOr: - Scalar0 = Builder.CreateOr(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerAnd: - Scalar0 = Builder.CreateAnd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerXor: - Scalar0 = Builder.CreateXor(Scalar0, Scalar1); - break; - default: - llvm_unreachable("Unknown reduction operation"); + case LoopVectorizationLegality::IntegerAdd: + Scalar0 = Builder.CreateAdd(Scalar0, Scalar1, "add.rdx"); + break; + case LoopVectorizationLegality::IntegerMult: + Scalar0 = Builder.CreateMul(Scalar0, Scalar1, "mul.rdx"); + break; + case LoopVectorizationLegality::IntegerOr: + Scalar0 = Builder.CreateOr(Scalar0, Scalar1, "or.rdx"); + break; + case LoopVectorizationLegality::IntegerAnd: + Scalar0 = Builder.CreateAnd(Scalar0, Scalar1, "and.rdx"); + break; + case LoopVectorizationLegality::IntegerXor: + Scalar0 = Builder.CreateXor(Scalar0, Scalar1, "xor.rdx"); + break; + default: + llvm_unreachable("Unknown reduction operation"); } } @@ -1379,15 +869,373 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. + int IncomingEdgeBlockIdx = + (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch()); + assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); + // Pick the other block. + int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); }// end of for each redux variable. } -void SingleBlockLoopVectorizer::updateAnalysis() { - // The original basic block. +Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { + assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) && + "Invalid edge"); + + Value *SrcMask = createBlockInMask(Src); + + // The terminator has to be a branch inst! + BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator()); + assert(BI && "Unexpected terminator found"); + + Value *EdgeMask = SrcMask; + if (BI->isConditional()) { + EdgeMask = getVectorValue(BI->getCondition()); + if (BI->getSuccessor(0) != Dst) + EdgeMask = Builder.CreateNot(EdgeMask); + } + + return Builder.CreateAnd(EdgeMask, SrcMask); +} + +Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { + assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + + // Loop incoming mask is all-one. + if (OrigLoop->getHeader() == BB) { + Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); + return getVectorValue(C); + } + + // This is the block mask. We OR all incoming edges, and with zero. + Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0); + Value *BlockMask = getVectorValue(Zero); + + // For each pred: + for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) + BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB)); + + return BlockMask; +} + +void +InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, + BasicBlock *BB, PhiVector *PV) { + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0); + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + switch (it->getOpcode()) { + case Instruction::Br: + // Nothing to do for PHIs and BR, since we already took care of the + // loop control flow instructions. + continue; + case Instruction::PHI:{ + PHINode* P = cast<PHINode>(it); + // Handle reduction variables: + if (Legal->getReductionVars()->count(P)) { + // This is phase one of vectorizing PHIs. + Type *VecTy = VectorType::get(it->getType(), VF); + WidenMap[it] = + PHINode::Create(VecTy, 2, "vec.phi", + LoopVectorBody->getFirstInsertionPt()); + PV->push_back(P); + continue; + } + + // Check for PHI nodes that are lowered to vector selects. + if (P->getParent() != OrigLoop->getHeader()) { + // We know that all PHIs in non header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + Value *Cond = createBlockInMask(P->getIncomingBlock(0)); + WidenMap[P] = + Builder.CreateSelect(Cond, + getVectorValue(P->getIncomingValue(0)), + getVectorValue(P->getIncomingValue(1)), + "predphi"); + continue; + } + + // This PHINode must be an induction variable. + // Make sure that we know about it. + assert(Legal->getInductionVars()->count(P) && + "Not an induction variable"); + + LoopVectorizationLegality::InductionInfo II = + Legal->getInductionVars()->lookup(P); + + switch (II.IK) { + case LoopVectorizationLegality::NoInduction: + llvm_unreachable("Unknown induction"); + case LoopVectorizationLegality::IntInduction: { + assert(P == OldInduction && "Unexpected PHI"); + Value *Broadcasted = getBroadcastInstrs(Induction); + // After broadcasting the induction variable we need to make the + // vector consecutive by adding 0, 1, 2 ... + Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted); + WidenMap[OldInduction] = ConsecutiveInduction; + continue; + } + case LoopVectorizationLegality::ReverseIntInduction: + case LoopVectorizationLegality::PtrInduction: + // Handle reverse integer and pointer inductions. + Value *StartIdx = 0; + // If we have a single integer induction variable then use it. + // Otherwise, start counting at zero. + if (OldInduction) { + LoopVectorizationLegality::InductionInfo OldII = + Legal->getInductionVars()->lookup(OldInduction); + StartIdx = OldII.StartValue; + } else { + StartIdx = ConstantInt::get(Induction->getType(), 0); + } + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, + "normalized.idx"); + + // Handle the reverse integer induction variable case. + if (LoopVectorizationLegality::ReverseIntInduction == II.IK) { + IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType()); + Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, + "resize.norm.idx"); + Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, + "reverse.idx"); + + // This is a new value so do not hoist it out. + Value *Broadcasted = getBroadcastInstrs(ReverseInd); + // After broadcasting the induction variable we need to make the + // vector consecutive by adding ... -3, -2, -1, 0. + Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted, + true); + WidenMap[it] = ConsecutiveInduction; + continue; + } + + // Handle the pointer induction variable case. + assert(P->getType()->isPointerTy() && "Unexpected type."); + + // This is the vector of results. Notice that we don't generate + // vector geps because scalar geps result in better code. + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + Constant *Idx = ConstantInt::get(Induction->getType(), i); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, + "gep.idx"); + Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, + "next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), + "insert.gep"); + } + + WidenMap[it] = VecVal; + continue; + } + + }// End of PHI. + + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen binops. + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + + // Use this vector value for all users of the original instruction. + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + WidenMap[it] = V; + + // Update the NSW, NUW and Exact flags. + BinaryOperator *VecOp = cast<BinaryOperator>(V); + if (isa<OverflowingBinaryOperator>(BinOp)) { + VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); + VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(VecOp)) + VecOp->setIsExact(BinOp->isExact()); + break; + } + case Instruction::Select: { + // Widen selects. + // If the selector is loop invariant we can create a select + // instruction with a scalar condition. Otherwise, use vector-select. + Value *Cond = it->getOperand(0); + bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + Cond = getVectorValue(Cond); + if (InvariantCond) + Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); + + Value *Op0 = getVectorValue(it->getOperand(1)); + Value *Op1 = getVectorValue(it->getOperand(2)); + WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1); + break; + } + + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (it->getOpcode() == Instruction::FCmp); + CmpInst *Cmp = dyn_cast<CmpInst>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + if (FCmp) + WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + else + WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B); + break; + } + + case Instruction::Store: { + // Attempt to issue a wide store. + StoreInst *SI = dyn_cast<StoreInst>(it); + Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Value *Ptr = SI->getPointerOperand(); + unsigned Alignment = SI->getAlignment(); + + assert(!Legal->isUniform(Ptr) && + "We do not allow storing to uniform addresses"); + + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + + // This store does not use GEPs. + if (!Legal->isConsecutivePtr(Ptr)) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); + Value *Val = getVectorValue(SI->getValueOperand()); + Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); + break; + } + case Instruction::Load: { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast<LoadInst>(it); + Type *RetTy = VectorType::get(LI->getType(), VF); + Value *Ptr = LI->getPointerOperand(); + unsigned Alignment = LI->getAlignment(); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + + // If the pointer is loop invariant or if it is non consecutive, + // scalarize the load. + bool Con = Legal->isConsecutivePtr(Ptr); + if (Legal->isUniform(Ptr) || !Con) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + + Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); + LI = Builder.CreateLoad(Ptr); + LI->setAlignment(Alignment); + // Use this vector value for all users of the load. + WidenMap[it] = LI; + break; + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + /// Vectorize bitcasts. + CastInst *CI = dyn_cast<CastInst>(it); + Value *A = getVectorValue(it->getOperand(0)); + Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); + WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); + break; + } + + case Instruction::Call: { + assert(isTriviallyVectorizableIntrinsic(it)); + Module *M = BB->getParent()->getParent(); + IntrinsicInst *II = cast<IntrinsicInst>(it); + Intrinsic::ID ID = II->getIntrinsicID(); + SmallVector<Value*, 4> Args; + for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) + Args.push_back(getVectorValue(II->getArgOperand(i))); + Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) }; + Function *F = Intrinsic::getDeclaration(M, ID, Tys); + WidenMap[it] = Builder.CreateCall(F, Args); + break; + } + + default: + // All other instructions are unsupported. Scalarize them. + scalarizeInstruction(it); + break; + }// end of switch. + }// end of for_each instr. +} + +void InnerLoopVectorizer::updateAnalysis() { + // Forget the original basic block. SE->forgetLoop(OrigLoop); // Update the dominator tree information. @@ -1404,45 +1252,90 @@ void SingleBlockLoopVectorizer::updateAnalysis() { DEBUG(DT->verifyAnalysis()); } -bool LoopVectorizationLegality::canVectorize() { - if (!TheLoop->getLoopPreheader()) { - assert(false && "No preheader!!"); - DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); +bool LoopVectorizationLegality::canVectorizeWithIfConvert() { + if (!EnableIfConversion) return false; + + assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); + std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector(); + + // Collect the blocks that need predication. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // We must have at most two predecessors because we need to convert + // all PHIs to selects. + unsigned Preds = std::distance(pred_begin(BB), pred_end(BB)); + if (Preds > 2) + return false; + + // We must be able to predicate all blocks that need to be predicated. + if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) + return false; } - // We can only vectorize single basic block loops. + // We can if-convert this loop. + return true; +} + +bool LoopVectorizationLegality::canVectorize() { + assert(TheLoop->getLoopPreheader() && "No preheader!!"); + + // We can only vectorize innermost loops. + if (TheLoop->getSubLoopsVector().size()) + return false; + + // We must have a single backedge. + if (TheLoop->getNumBackEdges() != 1) + return false; + + // We must have a single exiting block. + if (!TheLoop->getExitingBlock()) + return false; + unsigned NumBlocks = TheLoop->getNumBlocks(); - if (NumBlocks != 1) { - DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); + + // Check if we can if-convert non single-bb loops. + if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { + DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); return false; } // We need to have a loop header. - BasicBlock *BB = TheLoop->getHeader(); - DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); + BasicBlock *Latch = TheLoop->getLoopLatch(); + DEBUG(dbgs() << "LV: Found a loop: " << + TheLoop->getHeader()->getName() << "\n"); // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); + const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch); if (ExitCount == SE->getCouldNotCompute()) { DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } // Do not loop-vectorize loops with a tiny trip count. - unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB); + unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); if (TC > 0u && TC < TinyTripCountThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is not worth vectorizing.\n"); return false; } + // Check if we can vectorize the instructions and CFG in this loop. + if (!canVectorizeInstrs()) { + DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); + return false; + } + // Go over each instruction and look at memory deps. - if (!canVectorizeBlock(*BB)) { - DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); + if (!canVectorizeMemory()) { + DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); return false; } + // Collect all of the variables that remain uniform after vectorization. + collectLoopUniforms(); + DEBUG(dbgs() << "LV: We can vectorize this loop" << (PtrRtCheck.Need ? " (with a runtime bound check)" : "") <<"!\n"); @@ -1453,137 +1346,152 @@ bool LoopVectorizationLegality::canVectorize() { return true; } -bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { - +bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); + BasicBlock *Header = TheLoop->getHeader(); - // Scan the instructions in the block and look for hazards. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *I = it; - - if (PHINode *Phi = dyn_cast<PHINode>(I)) { - // This should not happen because the loop should be normalized. - if (Phi->getNumIncomingValues() != 2) { - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); - return false; - } + // For each block in the loop. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { - // This is the value coming from the preheader. - Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + // Scan the instructions in the block and look for hazards. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { - // We only look at integer and pointer phi nodes. - if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) { - DEBUG(dbgs() << "LV: Found a pointer induction variable.\n"); - Inductions[Phi] = StartValue; - continue; - } else if (!Phi->getType()->isIntegerTy()) { - DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); - return false; - } + if (PHINode *Phi = dyn_cast<PHINode>(it)) { + // This should not happen because the loop should be normalized. + if (Phi->getNumIncomingValues() != 2) { + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + return false; + } - // Handle integer PHIs: - if (isInductionVariable(Phi)) { - if (Induction) { - DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); + // Check that this PHI type is allowed. + if (!Phi->getType()->isIntegerTy() && + !Phi->getType()->isPointerTy()) { + DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; } - DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n"); - Induction = Phi; - Inductions[Phi] = StartValue; - continue; - } - if (AddReductionVar(Phi, IntegerAdd)) { - DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerMult)) { - DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerOr)) { - DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerAnd)) { - DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerXor)) { - DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); - continue; - } - DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); - return false; - }// end of PHI handling + // If this PHINode is not in the header block, then we know that we + // can convert it to select during if-conversion. No need to check if + // the PHIs in this block are induction or reduction variables. + if (*bb != Header) + continue; - // We still don't handle functions. - CallInst *CI = dyn_cast<CallInst>(I); - if (CI) { - DEBUG(dbgs() << "LV: Found a call site.\n"); - return false; - } + // This is the value coming from the preheader. + Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + // Check if this is an induction variable. + InductionKind IK = isInductionVariable(Phi); + + if (NoInduction != IK) { + // Int inductions are special because we only allow one IV. + if (IK == IntInduction) { + if (Induction) { + DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); + return false; + } + Induction = Phi; + } + + DEBUG(dbgs() << "LV: Found an induction variable.\n"); + Inductions[Phi] = InductionInfo(StartValue, IK); + continue; + } - // We do not re-vectorize vectors. - if (!VectorType::isValidElementType(I->getType()) && - !I->getType()->isVoidTy()) { - DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); - return false; - } + if (AddReductionVar(Phi, IntegerAdd)) { + DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerMult)) { + DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerOr)) { + DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerAnd)) { + DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerXor)) { + DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); + continue; + } - // Reduction instructions are allowed to have exit users. - // All other instructions must not have external users. - if (!AllowedExit.count(I)) - //Check that all of the users of the loop are inside the BB. - for (Value::use_iterator it = I->use_begin(), e = I->use_end(); - it != e; ++it) { - Instruction *U = cast<Instruction>(*it); - // This user may be a reduction exit value. - BasicBlock *Parent = U->getParent(); - if (Parent != &BB) { - DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); - return false; + DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); + return false; + }// end of PHI handling + + // We still don't handle functions. + CallInst *CI = dyn_cast<CallInst>(it); + if (CI && !isTriviallyVectorizableIntrinsic(it)) { + DEBUG(dbgs() << "LV: Found a call site.\n"); + return false; + } + + // We do not re-vectorize vectors. + if (!VectorType::isValidElementType(it->getType()) && + !it->getType()->isVoidTy()) { + DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); + return false; + } + + // Reduction instructions are allowed to have exit users. + // All other instructions must not have external users. + if (!AllowedExit.count(it)) + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator I = it->use_begin(), E = it->use_end(); + I != E; ++I) { + Instruction *U = cast<Instruction>(*I); + // This user may be a reduction exit value. + if (!TheLoop->contains(U)) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return false; + } } - } - } // next instr. + } // next instr. + + } if (!Induction) { DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); assert(getInductionVars()->size() && "No induction variables"); } - // Don't vectorize if the memory dependencies do not allow vectorization. - if (!canVectorizeMemory(BB)) - return false; + return true; +} +void LoopVectorizationLegality::collectLoopUniforms() { // We now know that the loop is vectorizable! // Collect variables that will remain uniform after vectorization. std::vector<Value*> Worklist; + BasicBlock *Latch = TheLoop->getLoopLatch(); // Start with the conditional branch and walk up the block. - Worklist.push_back(BB.getTerminator()->getOperand(0)); + Worklist.push_back(Latch->getTerminator()->getOperand(0)); while (Worklist.size()) { Instruction *I = dyn_cast<Instruction>(Worklist.back()); Worklist.pop_back(); - // Look at instructions inside this block. Stop when reaching PHI nodes. - if (!I || I->getParent() != &BB || isa<PHINode>(I)) + // Look at instructions inside this loop. + // Stop when reaching PHI nodes. + // TODO: we need to follow values all over the loop, not only in this block. + if (!I || !TheLoop->contains(I) || isa<PHINode>(I)) continue; // This is a known uniform. Uniforms.insert(I); // Insert all operands. - for (int i=0, Op = I->getNumOperands(); i < Op; ++i) { + for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) { Worklist.push_back(I->getOperand(i)); } } - - return true; } -bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { +bool LoopVectorizationLegality::canVectorizeMemory() { typedef SmallVector<Value*, 16> ValueVector; typedef SmallPtrSet<Value*, 16> ValueSet; // Holds the Load and Store *instructions*. @@ -1592,35 +1500,40 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { PtrRtCheck.Pointers.clear(); PtrRtCheck.Need = false; - // Scan the BB and collect legal loads and stores. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *I = it; - - // If this is a load, save it. If this instruction can read from memory - // but is not a load, then we quit. Notice that we don't handle function - // calls that read or write. - if (I->mayReadFromMemory()) { - LoadInst *Ld = dyn_cast<LoadInst>(I); - if (!Ld) return false; - if (!Ld->isSimple()) { - DEBUG(dbgs() << "LV: Found a non-simple load.\n"); - return false; + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + + // If this is a load, save it. If this instruction can read from memory + // but is not a load, then we quit. Notice that we don't handle function + // calls that read or write. + if (it->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast<LoadInst>(it); + if (!Ld) return false; + if (!Ld->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple load.\n"); + return false; + } + Loads.push_back(Ld); + continue; } - Loads.push_back(Ld); - continue; - } - // Save store instructions. Abort if other instructions write to memory. - if (I->mayWriteToMemory()) { - StoreInst *St = dyn_cast<StoreInst>(I); - if (!St) return false; - if (!St->isSimple()) { - DEBUG(dbgs() << "LV: Found a non-simple store.\n"); - return false; + // Save 'store' instructions. Abort if other instructions write to memory. + if (it->mayWriteToMemory()) { + StoreInst *St = dyn_cast<StoreInst>(it); + if (!St) return false; + if (!St->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple store.\n"); + return false; + } + Stores.push_back(St); } - Stores.push_back(St); - } - } // next instr. + } // next instr. + } // next block. // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -1628,8 +1541,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // Check if we see any stores. If there are no stores, then we don't // care if the pointers are *restrict*. if (!Stores.size()) { - DEBUG(dbgs() << "LV: Found a read-only loop!\n"); - return true; + DEBUG(dbgs() << "LV: Found a read-only loop!\n"); + return true; } // Holds the read and read-write *pointers* that we find. @@ -1770,11 +1683,13 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (Phi->getNumIncomingValues() != 2) return false; - // Find the possible incoming reduction variable. - BasicBlock *BB = Phi->getParent(); - int SelfEdgeIdx = Phi->getBasicBlockIndex(BB); - int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry. - Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx); + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); // ExitInstruction is the single value which is used outside the loop. // We only allow for a single reduction value to be used outside the loop. @@ -1789,20 +1704,20 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Also, we can't have multiple block-local users. Instruction *Iter = Phi; while (true) { + // If the instruction has no users then this is a broken + // chain and can't be a reduction variable. + if (Iter->use_empty()) + return false; + // Any reduction instr must be of one of the allowed kinds. if (!isReductionInstr(Iter, Kind)) return false; - // Did we found a user inside this block ? + // Did we find a user inside this block ? bool FoundInBlockUser = false; // Did we reach the initial PHI node ? bool FoundStartPHI = false; - // If the instruction has no users then this is a broken - // chain and can't be a reduction variable. - if (Iter->use_empty()) - return false; - // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -1812,14 +1727,23 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, FoundStartPHI = true; continue; } + // Check if we found the exit user. BasicBlock *Parent = U->getParent(); - if (Parent != BB) { - // We must have a single exit instruction. + if (!TheLoop->contains(Parent)) { + // Exit if you find multiple outside users. if (ExitInstruction != 0) return false; ExitInstruction = Iter; } + + // We allow in-loop PHINodes which are not the original reduction PHI + // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE + // structure) then don't skip this PHI. + if (isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() && + TheLoop->contains(U) && Iter->getNumUses() > 1) + continue; + // We can't have multiple inside users. if (FoundInBlockUser) return false; @@ -1830,67 +1754,110 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. - if (FoundStartPHI && ExitInstruction) { - // This instruction is allowed to have out-of-loop users. - AllowedExit.insert(ExitInstruction); - - // Save the description of this reduction variable. - ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); - Reductions[Phi] = RD; - return true; - } + if (FoundStartPHI && ExitInstruction) { + // This instruction is allowed to have out-of-loop users. + AllowedExit.insert(ExitInstruction); + + // Save the description of this reduction variable. + ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); + Reductions[Phi] = RD; + return true; + } + + // If we've reached the start PHI but did not find an outside user then + // this is dead code. Abort. + if (FoundStartPHI) + return false; } } bool LoopVectorizationLegality::isReductionInstr(Instruction *I, ReductionKind Kind) { - switch (I->getOpcode()) { - default: - return false; - case Instruction::PHI: - // possibly. - return true; - case Instruction::Add: - case Instruction::Sub: - return Kind == IntegerAdd; - case Instruction::Mul: - return Kind == IntegerMult; - case Instruction::And: - return Kind == IntegerAnd; - case Instruction::Or: - return Kind == IntegerOr; - case Instruction::Xor: - return Kind == IntegerXor; - } + switch (I->getOpcode()) { + default: + return false; + case Instruction::PHI: + // possibly. + return true; + case Instruction::Add: + case Instruction::Sub: + return Kind == IntegerAdd; + case Instruction::Mul: + return Kind == IntegerMult; + case Instruction::And: + return Kind == IntegerAnd; + case Instruction::Or: + return Kind == IntegerOr; + case Instruction::Xor: + return Kind == IntegerXor; + } } -bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { +LoopVectorizationLegality::InductionKind +LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) - return false; + return NoInduction; // Check that the PHI is consecutive and starts at zero. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return false; + return NoInduction; } const SCEV *Step = AR->getStepRecurrence(*SE); // Integer inductions need to have a stride of one. - if (PhiTy->isIntegerTy()) - return Step->isOne(); + if (PhiTy->isIntegerTy()) { + if (Step->isOne()) + return IntInduction; + if (Step->isAllOnesValue()) + return ReverseIntInduction; + return NoInduction; + } // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); - if (!C) return false; + if (!C) + return NoInduction; assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); - return (C->getValue()->equalsInt(Size)); + if (C->getValue()->equalsInt(Size)) + return PtrInduction; + + return NoInduction; +} + +bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { + assert(TheLoop->contains(BB) && "Unknown block used"); + + // Blocks that do not dominate the latch need predication. + BasicBlock* Latch = TheLoop->getLoopLatch(); + return !DT->dominates(BB, Latch); +} + +bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // We don't predicate loads/stores at the moment. + if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + return false; + + // The isntructions below can trap. + switch (it->getOpcode()) { + default: continue; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return false; + } + } + + return true; } bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { @@ -1930,19 +1897,29 @@ LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { } unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { - // We can only estimate the cost of single basic block loops. - assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop"); - - BasicBlock *BB = TheLoop->getHeader(); unsigned Cost = 0; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - Instruction *Inst = it; - unsigned C = getInstructionCost(Inst, VF); - Cost += C; - DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF << - " For instruction: "<< *Inst << "\n"); + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + unsigned BlockCost = 0; + BasicBlock *BB = *bb; + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + unsigned C = getInstructionCost(it, VF); + Cost += C; + DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << + VF << " For instruction: "<< *it << "\n"); + } + + // We assume that if-converted blocks have a 50% chance of being executed. + // When the code is scalar then some of the blocks are avoided due to CF. + // When the code is vectorized we execute all code paths. + if (Legal->blockNeedsPredication(*bb) && VF == 1) + BlockCost /= 2; + + Cost += BlockCost; } return Cost; @@ -1960,147 +1937,156 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Type *RetTy = I->getType(); Type *VectorTy = ToVectorTy(RetTy, VF); - // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { - case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the intruction addressing mode. At the moment we don't - // generate vector geps. - return 0; - case Instruction::Br: { - return VTTI->getCFInstrCost(I->getOpcode()); - } - case Instruction::PHI: - return 0; - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); - } - case Instruction::Select: { - SelectInst *SI = cast<SelectInst>(I); - const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); - bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); - Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) - CondTy = VectorType::get(CondTy, VF); - - return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); - } - case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - VectorTy = ToVectorTy(ValTy, VF); - return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); - } - case Instruction::Store: { - StoreInst *SI = cast<StoreInst>(I); - Type *ValTy = SI->getValueOperand()->getType(); - VectorTy = ToVectorTy(ValTy, VF); - - if (VF == 1) - return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), SI->getPointerAddressSpace()); - - // Scalarized stores. - if (!Legal->isConsecutivePtr(SI->getPointerOperand())) { - unsigned Cost = 0; - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - ValTy); - // The cost of extracting from the value vector. - Cost += VF * (ExtCost); - // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - ValTy->getScalarType(), - SI->getAlignment(), - SI->getPointerAddressSpace()); - return Cost; - } - - // Wide stores. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), + case Instruction::GetElementPtr: + // We mark this instruction as zero-cost because scalar GEPs are usually + // lowered to the intruction addressing mode. At the moment we don't + // generate vector geps. + return 0; + case Instruction::Br: { + return VTTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::PHI: + //TODO: IF-converted IFs become selects. + return 0; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); + bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); + Type *CondTy = SI->getCondition()->getType(); + if (ScalarCond) + CondTy = VectorType::get(CondTy, VF); + + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + VectorTy = ToVectorTy(ValTy, VF); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + VectorTy = ToVectorTy(ValTy, VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), SI->getPointerAddressSpace()); + + // Scalarized stores. + if (!Legal->isConsecutivePtr(SI->getPointerOperand())) { + unsigned Cost = 0; + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + ValTy); + // The cost of extracting from the value vector. + Cost += VF * (ExtCost); + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + ValTy->getScalarType(), + SI->getAlignment(), + SI->getPointerAddressSpace()); + return Cost; } - case Instruction::Load: { - LoadInst *LI = cast<LoadInst>(I); - - if (VF == 1) - return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, - LI->getAlignment(), - LI->getPointerAddressSpace()); - - // Scalarized loads. - if (!Legal->isConsecutivePtr(LI->getPointerOperand())) { - unsigned Cost = 0; - unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); - // The cost of inserting the loaded value into the result vector. - Cost += VF * (InCost); - // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - RetTy->getScalarType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); - return Cost; - } - // Wide loads. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), + // Wide stores. + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(I); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, + LI->getAlignment(), LI->getPointerAddressSpace()); - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); - return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); - } - default: { - // We are scalarizing the instruction. Return the cost of the scalar - // instruction, plus the cost of insert and extract into vector - // elements, times the vector width. + + // Scalarized loads. + if (!Legal->isConsecutivePtr(LI->getPointerOperand())) { unsigned Cost = 0; + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); + // The cost of inserting the loaded value into the result vector. + Cost += VF * (InCost); + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + RetTy->getScalarType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + return Cost; + } + + // Wide loads. + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); + return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); + } + case Instruction::Call: { + assert(isTriviallyVectorizableIntrinsic(I)); + IntrinsicInst *II = cast<IntrinsicInst>(I); + Type *RetTy = ToVectorTy(II->getType(), VF); + SmallVector<Type*, 4> Tys; + for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF)); + return VTTI->getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys); + } + default: { + // We are scalarizing the instruction. Return the cost of the scalar + // instruction, plus the cost of insert and extract into vector + // elements, times the vector width. + unsigned Cost = 0; - bool IsVoid = RetTy->isVoidTy(); + bool IsVoid = RetTy->isVoidTy(); - unsigned InsCost = (IsVoid ? 0 : - VTTI->getInstrCost(Instruction::InsertElement, - VectorTy)); + unsigned InsCost = (IsVoid ? 0 : + VTTI->getInstrCost(Instruction::InsertElement, + VectorTy)); - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - VectorTy); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VectorTy); - // The cost of inserting the results plus extracting each one of the - // operands. - Cost += VF * (InsCost + ExtCost * I->getNumOperands()); + // The cost of inserting the results plus extracting each one of the + // operands. + Cost += VF * (InsCost + ExtCost * I->getNumOperands()); - // The cost of executing VF copies of the scalar instruction. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); - return Cost; - } + // The cost of executing VF copies of the scalar instruction. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); + return Cost; + } }// end of switch. } @@ -2110,8 +2096,6 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { return VectorType::get(Scalar, VF); } -} // namespace - char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) @@ -2126,3 +2110,4 @@ namespace llvm { } } + diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h new file mode 100644 index 0000000000..9d6d80e22b --- /dev/null +++ b/lib/Transforms/Vectorize/LoopVectorize.h @@ -0,0 +1,458 @@ +//===- LoopVectorize.h --- A Loop Vectorizer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops +// and generates target-independent LLVM-IR. Legalization of the IR is done +// in the codegen. However, the vectorizes uses (will use) the codegen +// interfaces to generate IR that is likely to result in an optimal binary. +// +// The loop vectorizer combines consecutive loop iteration into a single +// 'wide' iteration. After this transformation the index is incremented +// by the SIMD vector width, and not by one. +// +// This pass has three parts: +// 1. The main loop pass that drives the different parts. +// 2. LoopVectorizationLegality - A unit that checks for the legality +// of the vectorization. +// 3. InnerLoopVectorizer - A unit that performs the actual +// widening of instructions. +// 4. LoopVectorizationCostModel - A unit that checks for the profitability +// of vectorization. It decides on the optimal vector width, which +// can be one, if vectorization is not profitable. +// +//===----------------------------------------------------------------------===// +// +// The reduction-variable vectorization is based on the paper: +// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. +// +// Variable uniformity checks are inspired by: +// Karrenberg, R. and Hack, S. Whole Function Vectorization. +// +// Other ideas/concepts are from: +// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. +// +// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of +// Vectorizing Compilers. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H +#define LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H + +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IRBuilder.h" + +#include <algorithm> +using namespace llvm; + +/// We don't vectorize loops with a known constant trip count below this number. +const unsigned TinyTripCountThreshold = 16; + +/// When performing a runtime memory check, do not check more than this +/// number of pointers. Notice that the check is quadratic! +const unsigned RuntimeMemoryCheckThreshold = 4; + +/// This is the highest vector width that we try to generate. +const unsigned MaxVectorSize = 8; + +namespace llvm { + +// Forward declarations. +class LoopVectorizationLegality; +class LoopVectorizationCostModel; +class VectorTargetTransformInfo; + +/// InnerLoopVectorizer vectorizes loops which contain only one basic +/// block to a specified vectorization factor (VF). +/// This class performs the widening of scalars into vectors, or multiple +/// scalars. This class also implements the following features: +/// * It inserts an epilogue loop for handling loops that don't have iteration +/// counts that are known to be a multiple of the vectorization factor. +/// * It handles the code generation for reduction variables. +/// * Scalarization (implementation using scalars) of un-vectorizable +/// instructions. +/// InnerLoopVectorizer does not perform any vectorization-legality +/// checks, and relies on the caller to check for the different legality +/// aspects. The InnerLoopVectorizer relies on the +/// LoopVectorizationLegality class to provide information about the induction +/// and reduction variables that were found to a given vectorization factor. +class InnerLoopVectorizer { +public: + /// Ctor. + InnerLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, + DominatorTree *Dt, DataLayout *Dl, unsigned VecWidth): + OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), VF(VecWidth), + Builder(Se->getContext()), Induction(0), OldInduction(0) { } + + // Perform the actual loop widening (vectorization). + void vectorize(LoopVectorizationLegality *Legal) { + // Create a new empty loop. Unlink the old loop and connect the new one. + createEmptyLoop(Legal); + // Widen each instruction in the old loop to a new one in the new loop. + // Use the Legality module to find the induction and reduction variables. + vectorizeLoop(Legal); + // Register the new loop and update the analysis passes. + updateAnalysis(); + } + +private: + /// A small list of PHINodes. + typedef SmallVector<PHINode*, 4> PhiVector; + + /// Add code that checks at runtime if the accessed arrays overlap. + /// Returns the comparator value or NULL if no check is needed. + Value *addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc); + /// Create an empty loop, based on the loop ranges of the old loop. + void createEmptyLoop(LoopVectorizationLegality *Legal); + /// Copy and widen the instructions from the old loop. + void vectorizeLoop(LoopVectorizationLegality *Legal); + + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + Value *createBlockInMask(BasicBlock *BB); + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); + + /// A helper function to vectorize a single BB within the innermost loop. + void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, + PhiVector *PV); + + /// Insert the new loop to the loop hierarchy and pass manager + /// and update the analysis passes. + void updateAnalysis(); + + /// This instruction is un-vectorizable. Implement it as a sequence + /// of scalars. + void scalarizeInstruction(Instruction *Instr); + + /// Create a broadcast instruction. This method generates a broadcast + /// instruction (shuffle) for loop invariant values and for the induction + /// value. If this is the induction variable then we extend it to N, N+1, ... + /// this is needed because each iteration in the loop corresponds to a SIMD + /// element. + Value *getBroadcastInstrs(Value *V); + + /// This function adds 0, 1, 2 ... to each vector element, starting at zero. + /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). + Value *getConsecutiveVector(Value* Val, bool Negate = false); + + /// When we go over instructions in the basic block we rely on previous + /// values within the current basic block or on loop invariant values. + /// When we widen (vectorize) values we place them in the map. If the values + /// are not within the map, they have to be loop invariant, so we simply + /// broadcast them into a vector. + Value *getVectorValue(Value *V); + + /// Get a uniform vector of constant integers. We use this to get + /// vectors of ones and zeros for the reduction code. + Constant* getUniformVector(unsigned Val, Type* ScalarTy); + + typedef DenseMap<Value*, Value*> ValueMap; + + /// The original loop. + Loop *OrigLoop; + // Scev analysis to use. + ScalarEvolution *SE; + // Loop Info. + LoopInfo *LI; + // Dominator Tree. + DominatorTree *DT; + // Data Layout. + DataLayout *DL; + // The vectorization factor to use. + unsigned VF; + + // The builder that we use + IRBuilder<> Builder; + + // --- Vectorization state --- + + /// The vector-loop preheader. + BasicBlock *LoopVectorPreHeader; + /// The scalar-loop preheader. + BasicBlock *LoopScalarPreHeader; + /// Middle Block between the vector and the scalar. + BasicBlock *LoopMiddleBlock; + ///The ExitBlock of the scalar loop. + BasicBlock *LoopExitBlock; + ///The vector loop body. + BasicBlock *LoopVectorBody; + ///The scalar loop body. + BasicBlock *LoopScalarBody; + ///The first bypass block. + BasicBlock *LoopBypassBlock; + + /// The new Induction variable which was added to the new block. + PHINode *Induction; + /// The induction variable of the old basic block. + PHINode *OldInduction; + // Maps scalars to widened vectors. + ValueMap WidenMap; +}; + +/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and +/// to what vectorization factor. +/// This class does not look at the profitability of vectorization, only the +/// legality. This class has two main kinds of checks: +/// * Memory checks - The code in canVectorizeMemory checks if vectorization +/// will change the order of memory accesses in a way that will change the +/// correctness of the program. +/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory +/// checks for a number of different conditions, such as the availability of a +/// single induction variable, that all types are supported and vectorize-able, +/// etc. This code reflects the capabilities of InnerLoopVectorizer. +/// This class is also used by InnerLoopVectorizer for identifying +/// induction variable and the different reduction variables. +class LoopVectorizationLegality { +public: + LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl, + DominatorTree *Dt): + TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { } + + /// This enum represents the kinds of reductions that we support. + enum ReductionKind { + NoReduction, /// Not a reduction. + IntegerAdd, /// Sum of numbers. + IntegerMult, /// Product of numbers. + IntegerOr, /// Bitwise or logical OR of numbers. + IntegerAnd, /// Bitwise or logical AND of numbers. + IntegerXor /// Bitwise or logical XOR of numbers. + }; + + /// This enum represents the kinds of inductions that we support. + enum InductionKind { + NoInduction, /// Not an induction variable. + IntInduction, /// Integer induction variable. Step = 1. + ReverseIntInduction, /// Reverse int induction variable. Step = -1. + PtrInduction /// Pointer induction variable. Step = sizeof(elem). + }; + + /// This POD struct holds information about reduction variables. + struct ReductionDescriptor { + // Default C'tor + ReductionDescriptor(): + StartValue(0), LoopExitInstr(0), Kind(NoReduction) {} + + // C'tor. + ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K): + StartValue(Start), LoopExitInstr(Exit), Kind(K) {} + + // The starting value of the reduction. + // It does not have to be zero! + Value *StartValue; + // The instruction who's value is used outside the loop. + Instruction *LoopExitInstr; + // The kind of the reduction. + ReductionKind Kind; + }; + + // This POD struct holds information about the memory runtime legality + // check that a group of pointers do not overlap. + struct RuntimePointerCheck { + RuntimePointerCheck(): Need(false) {} + + /// Reset the state of the pointer runtime information. + void reset() { + Need = false; + Pointers.clear(); + Starts.clear(); + Ends.clear(); + } + + /// Insert a pointer and calculate the start and end SCEVs. + void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr); + + /// This flag indicates if we need to add the runtime check. + bool Need; + /// Holds the pointers that we need to check. + SmallVector<Value*, 2> Pointers; + /// Holds the pointer value at the beginning of the loop. + SmallVector<const SCEV*, 2> Starts; + /// Holds the pointer value at the end of the loop. + SmallVector<const SCEV*, 2> Ends; + }; + + /// A POD for saving information about induction variables. + struct InductionInfo { + /// Ctors. + InductionInfo(Value *Start, InductionKind K): + StartValue(Start), IK(K) {}; + InductionInfo(): StartValue(0), IK(NoInduction) {}; + /// Start value. + Value *StartValue; + /// Induction kind. + InductionKind IK; + }; + + /// ReductionList contains the reduction descriptors for all + /// of the reductions that were found in the loop. + typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; + + /// InductionList saves induction variables and maps them to the + /// induction descriptor. + typedef DenseMap<PHINode*, InductionInfo> InductionList; + + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); + + /// Returns the Induction variable. + PHINode *getInduction() {return Induction;} + + /// Returns the reduction variables found in the loop. + ReductionList *getReductionVars() { return &Reductions; } + + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + + /// Return true if the block BB needs to be predicated in order for the loop + /// to be vectorized. + bool blockNeedsPredication(BasicBlock *BB); + + /// Check if this pointer is consecutive when vectorizing. This happens + /// when the last index of the GEP is the induction variable, or that the + /// pointer itself is an induction variable. + /// This check allows us to vectorize A[idx] into a wide load/store. + bool isConsecutivePtr(Value *Ptr); + + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + + /// Returns the information that we collected about runtime memory check. + RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } +private: + /// Check if a single basic block loop is vectorizable. + /// At this point we know that this is a loop with a constant trip count + /// and we only need to check individual instructions. + bool canVectorizeInstrs(); + + /// When we vectorize loops we may change the order in which + /// we read and write from memory. This method checks if it is + /// legal to vectorize the code, considering only memory constrains. + /// Returns true if the loop is vectorizable + bool canVectorizeMemory(); + + /// Return true if we can vectorize this loop using the IF-conversion + /// transformation. + bool canVectorizeWithIfConvert(); + + /// Collect the variables that need to stay uniform after vectorization. + void collectLoopUniforms(); + + /// Return true if all of the instructions in the block can be speculatively + /// executed. + bool blockCanBePredicated(BasicBlock *BB); + + /// Returns True, if 'Phi' is the kind of reduction variable for type + /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. + bool AddReductionVar(PHINode *Phi, ReductionKind Kind); + /// Returns true if the instruction I can be a reduction variable of type + /// 'Kind'. + bool isReductionInstr(Instruction *I, ReductionKind Kind); + /// Returns the induction kind of Phi. This function may return NoInduction + /// if the PHI is not an induction variable. + InductionKind isInductionVariable(PHINode *Phi); + /// Return true if can compute the address bounds of Ptr within the loop. + bool hasComputableBounds(Value *Ptr); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + /// DataLayout analysis. + DataLayout *DL; + // Dominators. + DominatorTree *DT; + + // --- vectorization state --- // + + /// Holds the integer induction variable. This is the counter of the + /// loop. + PHINode *Induction; + /// Holds the reduction variables. + ReductionList Reductions; + /// Holds all of the induction variables that we found in the loop. + /// Notice that inductions don't need to start at zero and that induction + /// variables can be pointers. + InductionList Inductions; + + /// Allowed outside users. This holds the reduction + /// vars which can be accessed from outside the loop. + SmallPtrSet<Value*, 4> AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet<Instruction*, 4> Uniforms; + /// We need to check that all of the pointers in this list are disjoint + /// at runtime. + RuntimePointerCheck PtrRtCheck; +}; + +/// LoopVectorizationCostModel - estimates the expected speedups due to +/// vectorization. +/// In many cases vectorization is not profitable. This can happen because +/// of a number of reasons. In this class we mainly attempt to predict +/// the expected speedup/slowdowns due to the supported instruction set. +/// We use the VectorTargetTransformInfo to query the different backends +/// for the cost of different operations. +class LoopVectorizationCostModel { +public: + /// C'tor. + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, + LoopVectorizationLegality *Leg, + const VectorTargetTransformInfo *Vtti): + TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } + + /// Returns the most profitable vectorization factor for the loop that is + /// smaller or equal to the VF argument. This method checks every power + /// of two up to VF. + unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize); + +private: + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. + unsigned expectedCost(unsigned VF); + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + unsigned getInstructionCost(Instruction *I, unsigned VF); + + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + + /// Vectorization legality. + LoopVectorizationLegality *Legal; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; +}; + +}// namespace llvm + +#endif //LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H + diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index d26973a7b3..3fb36cadea 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Transforms/Vectorize.h" +#include "llvm/Transforms/Vectorize.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" +#include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Vectorize.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" using namespace llvm; |