diff options
author | Matt Beaumont-Gay <matthewbg@google.com> | 2012-12-14 17:55:15 +0000 |
---|---|---|
committer | Matt Beaumont-Gay <matthewbg@google.com> | 2012-12-14 17:55:15 +0000 |
commit | 6aed25d93d1cfcde5809a73ffa7dc1b0d6396f66 (patch) | |
tree | 57e2fdf1caf960d8d878e0289f32af6759832b49 /lib/Transforms | |
parent | 7139cfb19b1cc28dfd5e274c07ec68835bc6d6d6 (diff) | |
parent | 1ad9253c9d34ccbce3e7e4ea5d87c266cbf93410 (diff) |
Updating branches/google/stable to r169803
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@170212 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
110 files changed, 7014 insertions, 3615 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index b0e22de8d7..d0b146b4e9 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hello" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(HelloCounter, "Counts number of functions greeted"); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 8a0274b5ff..2132e0a5fe 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -31,21 +31,21 @@ #define DEBUG_TYPE "argpromotion" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Support/CallSite.h" +#include "llvm/Module.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include <set> using namespace llvm; @@ -515,12 +515,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); // Add any return attributes. Attributes attrs = PAL.getRetAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, attrs)); // First, determine the new argument list @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = PAL.getFnAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); Type *RetTy = FTy->getReturnType(); @@ -611,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(AttributesVec)); + NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -636,12 +636,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, CallSite CS(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); - const AttrListPtr &CallPAL = CS.getAttributes(); + const AttributeSet &CallPAL = CS.getAttributes(); // Add any return attributes. Attributes attrs = CallPAL.getRetAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, attrs)); // Loop over the operands, inserting GEP and loads in the caller as @@ -723,7 +723,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = CallPAL.getFnAttributes(); if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); Instruction *New; @@ -731,11 +731,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); - cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec)); + cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(), + AttributesVec)); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); - cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec)); + cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(), + AttributesVec)); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index e2f012657f..d30eeaf7d3 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,15 +19,15 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" using namespace llvm; STATISTIC(NumMerged, "Number of global constants merged"); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index fc22548db7..6236a04fc2 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -19,11 +19,15 @@ #define DEBUG_TYPE "deadargelim" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CallingConv.h" #include "llvm/Constant.h" +#include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/DIBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" @@ -32,10 +36,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include <map> #include <set> using namespace llvm; @@ -271,16 +271,16 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. - AttrListPtr PAL = CS.getAttributes(); + AttributeSet PAL = CS.getAttributes(); if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { SmallVector<AttributeWithIndex, 8> AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); Attributes FnAttrs = PAL.getFnAttributes(); if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); - PAL = AttrListPtr::get(AttributesVec); + PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } Instruction *New; @@ -698,7 +698,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); // The existing function return attributes. Attributes RAttrs = PAL.getRetAttributes(); @@ -773,7 +773,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { "Return attributes no longer compatible?"); if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, RAttrs)); // Remember which arguments are still alive. @@ -802,11 +802,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewPAL = AttrListPtr::get(AttributesVec); + AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -833,7 +833,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *Call = CS.getInstruction(); AttributesVec.clear(); - const AttrListPtr &CallPAL = CS.getAttributes(); + const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. Attributes RAttrs = CallPAL.getRetAttributes(); @@ -843,7 +843,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes::get(NF->getContext(), AttrBuilder(RAttrs). removeAttributes(Attributes::typeIncompatible(NF->getReturnType()))); if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, RAttrs)); // Declare these outside of the loops, so we can reuse them for the second @@ -870,11 +870,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec); + AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 6716deb9e4..f8d0491bf8 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Constants.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/SetVector.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 18409f77b3..685833da1a 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -20,17 +20,17 @@ #define DEBUG_TYPE "functionattrs" #include "llvm/Transforms/IPO.h" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/InstIterator.h" using namespace llvm; @@ -215,13 +215,13 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { AttrBuilder B; B.addAttribute(Attributes::ReadOnly) .addAttribute(Attributes::ReadNone); - F->removeAttribute(AttrListPtr::FunctionIndex, + F->removeAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); // Add in the new attribute. B.clear(); B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone); - F->addAttribute(AttrListPtr::FunctionIndex, + F->addAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); if (ReadsMemory) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 18c1c7b000..b2c819de2b 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,11 +17,11 @@ #define DEBUG_TYPE "globaldce" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumAliases , "Number of global aliases removed"); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 678189b3d6..20f9de5a83 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -15,29 +15,29 @@ #define DEBUG_TYPE "globalopt" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -148,17 +148,13 @@ struct GlobalStatus { /// an instruction (e.g. a constant expr or GV initializer). bool HasNonInstructionUser; - /// HasPHIUser - Set to true if this global has a user that is a PHI node. - bool HasPHIUser; - /// AtomicOrdering - Set to the strongest atomic ordering requirement. AtomicOrdering Ordering; GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), StoredOnceValue(0), AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false), - Ordering(NotAtomic) {} + HasNonInstructionUser(false), Ordering(NotAtomic) {} }; } @@ -200,11 +196,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, const User *U = *UI; if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; - + // If the result of the constantexpr isn't pointer type, then we won't // know to expect it in various places. Just reject early. if (!isa<PointerType>(CE->getType())) return true; - + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { @@ -225,6 +221,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Don't hack on volatile stores. if (SI->isVolatile()) return true; + GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering()); // If this is a direct store to the global (i.e., the global is a scalar @@ -234,6 +231,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) GS.StoredType = GlobalStatus::isInitializerStored; @@ -265,7 +270,6 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // have to be careful about infinite recursion. if (PHIUsers.insert(PN)) // Not already visited. if (AnalyzeGlobal(I, GS, PHIUsers)) return true; - GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { GS.isCompared = true; } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { @@ -2061,7 +2065,7 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) { +static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest)) continue; @@ -2148,7 +2152,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (GV == 0) return 0; - + // Verify that the initializer is simple enough for us to handle. We are // only allowed to optimize the initializer if it is unique. if (!GV->hasUniqueInitializer()) return 0; @@ -2254,7 +2258,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static inline bool +static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, const DataLayout *TD); @@ -2276,7 +2280,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || isa<GlobalValue>(C)) return true; - + // Aggregate values are safe if all their elements are. if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || isa<ConstantVector>(C)) { @@ -2287,7 +2291,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, } return true; } - + // We don't know exactly what relocations are allowed in constant expressions, // so we allow &global+constantoffset, which is safe and uniformly supported // across targets. @@ -2305,14 +2309,14 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, TD->getTypeSizeInBits(CE->getOperand(0)->getType())) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); - + // GEP is fine if it is simple + constant offset. case Instruction::GetElementPtr: for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) if (!isa<ConstantInt>(CE->getOperand(i))) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); - + case Instruction::Add: // We allow simple+cst. if (!isa<ConstantInt>(CE->getOperand(1))) @@ -2322,7 +2326,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, return false; } -static inline bool +static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, const DataLayout *TD) { @@ -2370,7 +2374,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - + // A constantexpr bitcast from a pointer to another pointer is a no-op, // and we know how to evaluate it by moving the bitcast from the pointer // operand to the value operand. @@ -2381,7 +2385,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); } } - + return false; } @@ -2411,7 +2415,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, // Return the modified struct. return ConstantStruct::get(STy, Elts); } - + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); SequentialType *InitTy = cast<SequentialType>(Init->getType()); @@ -2588,23 +2592,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; - + Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->getOpcode() == Instruction::BitCast) { // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. Ptr = CE->getOperand(0); - + Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); - + // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try // introspecting NewTy to find a legal conversion. @@ -2629,12 +2633,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } } - + // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); } - + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2796,7 +2800,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) InstResult = ConstantFoldConstantExpression(CE, TD, TLI); - + setVal(CurInst, InstResult); } @@ -2882,7 +2886,7 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD, Constant *RetValDummy; bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>()); - + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -3002,13 +3006,13 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { return 0; Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); - + if (!Fn) return 0; FunctionType *FTy = Fn->getFunctionType(); - - // Checking that the function has the right return type, the right number of + + // Checking that the function has the right return type, the right number of // parameters and that they all have pointer types should be enough. if (!FTy->getReturnType()->isIntegerTy() || FTy->getNumParams() != 3 || @@ -3083,7 +3087,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { // and remove them. bool Changed = false; - for (Function::use_iterator I = CXAAtExitFn->use_begin(), + for (Function::use_iterator I = CXAAtExitFn->use_begin(), E = CXAAtExitFn->use_end(); I != E;) { // We're only interested in calls. Theoretically, we could handle invoke // instructions as well, but neither llvm-gcc nor clang generate invokes @@ -3092,7 +3096,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { if (!CI) continue; - Function *DtorFn = + Function *DtorFn = dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); if (!DtorFn) continue; diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index d757e1fdb1..252b5b0584 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -17,14 +17,14 @@ #define DEBUG_TYPE "ipconstprop" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CallSite.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; STATISTIC(NumArgumentsProped, "Number of args turned into constants"); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index b1c36c15db..5b8832e5d7 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -13,18 +13,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/CallingConv.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Type.h" using namespace llvm; @@ -32,6 +32,7 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { + InlineCostAnalyzer CA; public: // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { @@ -43,6 +44,10 @@ namespace { } static char ID; // Pass identification, replacement for typeid virtual InlineCost getInlineCost(CallSite CS); + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + virtual bool doFinalization(CallGraph &CG) { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); } @@ -63,35 +68,6 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { return new AlwaysInliner(InsertLifetime); } -/// \brief Minimal filter to detect invalid constructs for inlining. -static bool isInlineViable(Function &F) { - bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice); - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain an indirect branch. - if (isa<IndirectBrInst>(BI->getTerminator())) - return false; - - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - CallSite CS(II); - if (!CS) - continue; - - // Disallow recursive calls. - if (&F == CS.getCalledFunction()) - return false; - - // Disallow calls which expose returns-twice to a function not previously - // attributed as such. - if (!ReturnsTwice && CS.isCall() && - cast<CallInst>(CS.getInstruction())->canReturnTwice()) - return false; - } - } - - return true; -} - /// \brief Get the inline cost for the always-inliner. /// /// The always inliner *only* handles functions which are marked with the @@ -106,27 +82,21 @@ static bool isInlineViable(Function &F) { /// likely not worth it in practice. InlineCost AlwaysInliner::getInlineCost(CallSite CS) { Function *Callee = CS.getCalledFunction(); - // We assume indirect calls aren't calling an always-inline function. - if (!Callee) return InlineCost::getNever(); - - // We can't inline calls to external functions. - // FIXME: We shouldn't even get here. - if (Callee->isDeclaration()) return InlineCost::getNever(); - - // Return never for anything not marked as always inline. - if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline)) - return InlineCost::getNever(); - // Do some minimal analysis to preclude non-viable functions. - if (!isInlineViable(*Callee)) - return InlineCost::getNever(); + // Only inline direct calls to functions with always-inline attributes + // that are viable for inlining. FIXME: We shouldn't even get here for + // declarations. + if (Callee && !Callee->isDeclaration() && + Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline) && + CA.isInlineViable(*Callee)) + return InlineCost::getAlways(); - // Otherwise, force inlining. - return InlineCost::getAlways(); + return InlineCost::getNever(); } // doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { + CA.setDataLayout(getAnalysisIfAvailable<DataLayout>()); return false; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index bf0b1f91a2..9c5feba08b 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/CallingConv.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/DataLayout.h" +#include "llvm/Type.h" using namespace llvm; @@ -42,6 +42,7 @@ namespace { InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, getInlineThreshold(CS)); } + using llvm::Pass::doInitialization; virtual bool doInitialization(CallGraph &CG); }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index abcb25fd45..bd8fa66d52 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index aa629cc0c6..b2cd3a765a 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -14,14 +14,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "internalize" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" #include <fstream> #include <set> using namespace llvm; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 97d7cdced0..af04d054ed 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "loop-extract" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/Statistic.h" #include <fstream> #include <set> using namespace llvm; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 44283ddce7..70345b8334 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -45,7 +45,13 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -53,17 +59,11 @@ #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include <vector> using namespace llvm; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 9c9910bd5c..6bd9c8372e 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -14,14 +14,14 @@ #define DEBUG_TYPE "partialinlining" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" using namespace llvm; STATISTIC(NumPartialInlined, "Number of functions partially inlined"); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 05253fcdda..a9a9f2eece 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -14,21 +14,19 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" - #include "llvm-c/Transforms/PassManagerBuilder.h" - -#include "llvm/PassManager.h" -#include "llvm/DefaultPasses.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/DefaultPasses.h" +#include "llvm/PassManager.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -190,10 +188,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (LoopVectorize) { + if (LoopVectorize && OptLevel > 1) MPM.add(createLoopVectorizePass()); - MPM.add(createLICMPass()); - } if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops @@ -323,7 +319,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createGlobalDCEPass()); } -LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) { +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); } diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index fb4ecbfe7b..19f34837c7 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "prune-eh" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include <algorithm> using namespace llvm; @@ -145,8 +145,8 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { NewAttributes.addAttribute(Attributes::NoReturn); Function *F = (*I)->getFunction(); - const AttrListPtr &PAL = F->getAttributes(); - const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0, + const AttributeSet &PAL = F->getAttributes(); + const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0, Attributes::get(F->getContext(), NewAttributes)); if (PAL != NPAL) { diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index b5f09eccca..80cb869f02 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -16,9 +16,9 @@ #define DEBUG_TYPE "strip-dead-prototypes" #include "llvm/Transforms/IPO.h" -#include "llvm/Pass.h" -#include "llvm/Module.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" using namespace llvm; STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 80bfc1cdb2..ad915d716f 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -21,17 +21,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 7467eca7ab..0570104205 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,12 +11,12 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IRBuilder.h" +#include "llvm/InstVisitor.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -327,6 +327,11 @@ private: bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth=0); + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded + /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. + Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, + APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne); /// SimplifyDemandedInstructionBits - Inst is an integer instruction that /// SimplifyDemandedBits knows about. See if the instruction has any diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 7d0af0d802..f5c42a7983 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Intrinsics.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" using namespace llvm; using namespace PatternMatch; @@ -269,7 +269,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is /// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient -/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates +/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, @@ -2159,6 +2159,27 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { I.setOperand(1, NewRHS); return &I; } + } else if (Op0I->getOpcode() == Instruction::LShr) { + // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) + // E1 = "X ^ C1" + BinaryOperator *E1; + ConstantInt *C1; + if (Op0I->hasOneUse() && + (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) && + E1->getOpcode() == Instruction::Xor && + (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) { + // fold (C1 >> C2) ^ C3 + ConstantInt *C2 = Op0CI, *C3 = RHS; + APInt FoldConst = C1->getValue().lshr(C2->getValue()); + FoldConst ^= C3->getValue(); + // Prepare the two operands. + Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Opnd0->takeName(Op0I); + cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc()); + Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); + + return BinaryOperator::CreateXor(Opnd0, FoldVal); + } } } } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ad6f9111c..784742f274 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,13 +12,16 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/CallSite.h" -#include "llvm/DataLayout.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/DataLayout.h" +#include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +STATISTIC(NumSimplified, "Number of library calls simplified"); + /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. static Type *getPromotedType(Type *Ty) { @@ -785,8 +788,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; - if (Value *With = Simplifier->optimizeCall(CI)) - return ReplaceInstUsesWith(*CI, With); + if (Value *With = Simplifier->optimizeCall(CI)) { + ++NumSimplified; + return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); + } return 0; } @@ -977,7 +982,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee == 0) return false; Instruction *Caller = CS.getInstruction(); - const AttrListPtr &CallerPAL = CS.getAttributes(); + const AttributeSet &CallerPAL = CS.getAttributes(); // Okay, this is a cast from a function to a different type. Unless doing so // would cause a type conversion of one of our arguments, change this call to @@ -1118,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add the new return attributes. if (RAttrs.hasAttributes()) attrVec.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, + AttributeWithIndex::get(AttributeSet::ReturnIndex, Attributes::get(FT->getContext(), RAttrs))); AI = CS.arg_begin(); @@ -1171,13 +1176,14 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Attributes FnAttrs = CallerPAL.getFnAttributes(); if (FnAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); + const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1237,7 +1243,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, Value *Callee = CS.getCalledValue(); PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const AttrListPtr &Attrs = CS.getAttributes(); + const AttributeSet &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. @@ -1252,7 +1258,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, PointerType *NestFPTy = cast<PointerType>(NestF->getType()); FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); - const AttrListPtr &NestAttrs = NestF->getAttributes(); + const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; @@ -1282,7 +1288,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any result attributes. Attributes Attr = Attrs.getRetAttributes(); if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, Attr)); { @@ -1315,7 +1321,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any function attributes. Attr = Attrs.getFnAttributes(); if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, Attr)); // The trampoline may have been bitcast to a bogus type (FTy). @@ -1355,7 +1361,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); + const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index bb59db8e7b..19de62c81f 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,8 +14,8 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8cb4a59cba..1b96c3cca4 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -2356,8 +2356,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Try not to increase register pressure. BO0->hasOneUse() && BO1->hasOneUse()) { // Determine Y and Z in the form icmp (X+Y), (X+Z). - Value *Y = (A == C || A == D) ? B : A; - Value *Z = (C == A || C == B) ? D : C; + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } return new ICmpInst(Pred, Y, Z); } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4ab5b6e4a0..5726d3a91d 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); @@ -150,25 +150,6 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return 0; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast<GEPOperator>(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -264,7 +245,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - if (TD) { + if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use @@ -273,7 +254,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), + AI.getAlignment(), TD); + if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cefe45ec86..5cd611c420 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -252,6 +252,46 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return Changed ? &I : 0; } +// +// Detect pattern: +// +// log2(Y*0.5) +// +// And check for corresponding fast math flags +// + +static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + + if (!Op->hasOneUse()) + return; + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); + if (!II) + return; + if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) + return; + Log2 = II; + + Value *OpLog2Of = II->getArgOperand(0); + if (!OpLog2Of->hasOneUse()) + return; + + Instruction *I = dyn_cast<Instruction>(OpLog2Of); + if (!I) + return; + if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return; + + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); + if (CFP && CFP->isExactlyValue(0.5)) { + Y = I->getOperand(1); + return; + } + CFP = dyn_cast<ConstantFP>(I->getOperand(1)); + if (CFP && CFP->isExactlyValue(0.5)) + Y = I->getOperand(0); +} + Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -284,6 +324,33 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Value *Op1v = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFMul(Op0v, Op1v); + // Under unsafe algebra do: + // X * log2(0.5*Y) = X*log2(Y) - X + if (I.hasUnsafeAlgebra()) { + Value *OpX = NULL; + Value *OpY = NULL; + IntrinsicInst *Log2; + detectLog2OfHalf(Op0, OpY, Log2); + if (OpY) { + OpX = Op1; + } else { + detectLog2OfHalf(Op1, OpY, Log2); + if (OpY) { + OpX = Op0; + } + } + // if pattern detected emit alternate sequence + if (OpX && OpY) { + Log2->setArgOperand(0, OpY); + Value *FMulVal = Builder->CreateFMul(OpX, Log2); + Instruction *FMul = cast<Instruction>(FMulVal); + FMul->copyFastMathFlags(Log2); + Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX); + FSub->copyFastMathFlags(Log2); + return FSub; + } + } + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index de9c77e600..ea127e9f53 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index a2d4c888f2..a262d711d3 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/PatternMatch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 57021f1bef..8a28d8eaa2 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { I.setOperand(1, Rem); return &I; } - + return 0; } @@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is the opposite shift, we can directly reuse the input of the shift // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. @@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return CanEvaluateTruncated(I->getOperand(0), Ty); } #endif - + } } - + // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + switch (I->getOpcode()) { default: return false; case Instruction::And: @@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); - + case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); @@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; - + // We can always turn shl(c)+shr(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't @@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; - + // We can always turn lshr(c)+shl(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't @@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::Select: { @@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return false; return true; } - } + } } /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, @@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, IC.getTargetLibraryInfo()); return V; } - + Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); @@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; - + case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); @@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setHasNoSignedWrap(false); return I; } - + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); - + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } - + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setIsExact(false); return BO; } - + case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); @@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, NumBits, isLeftShift, IC)); return PN; } - } + } } @@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - - + + // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - - return ReplaceInstUsesWith(I, + + return ReplaceInstUsesWith(I, GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); } - - - // See if we can simplify any instructions used by the instruction whose sole + + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate // a signed shift. // @@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } - + // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); - + // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; - + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); @@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && + if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); @@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - + // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the @@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return new TruncInst(And, I.getType()); } } - + if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { + match(Op0BOOp1, + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_ConstantInt(CC)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); @@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } - + // FALL THROUGH. case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast<BinaryOperator>(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { + m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), + m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); - + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } - + break; } } - - + + // If the operand is an bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { bool isValid = true; // Valid only for And, Or, Xor bool highBitSet = false; // Transform if high bit of constant set? - + switch (Op0BO->getOpcode()) { default: isValid = false; break; // Do not perform transform! case Instruction::Add: @@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, highBitSet = true; break; } - + // If this is a signed shift right, and the high bit is modified // by the logical operation, do not perform the transformation. // The highBitSet boolean indicates the value of the high bit of @@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - + if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - + Value *NewShift = Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); - + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); } } } } - + // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) ShiftOp = 0; - + if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { // This is a constant shift of a constant shift. Be careful about hiding @@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - + IntegerType *Ty = cast<IntegerType>(I.getType()); - + // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. @@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } - + return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, AmtSum)); } - + if (ShiftAmt1 == ShiftAmt2) { // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && @@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewLShr; } Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); @@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewShl; } Value *Shift = Builder->CreateShl(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && @@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - + if (Instruction *V = commonShiftTransforms(I)) return V; - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the shifted-out value is known-zero, then this is a NUW shift. - if (!I.hasNoUnsignedWrap() && + if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { I.setHasNoUnsignedWrap(); return &I; } - + // If the shifted out value is all signbits, this is a NSW shift. if (!I.hasNoSignedWrap() && ComputeNumSignBits(I.getOperand(0)) > ShAmt) { @@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { @@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); @@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return new ZExtInst(Cmp, II->getType()); } } - + // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; - } + } } - + return 0; } @@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; @@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; } - } - + } + // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, APInt::getSignBit(I.getType()->getScalarSizeInBits()))) return BinaryOperator::CreateLShr(Op0, Op1); - + // Arithmetic shifting an all-sign-bit value is a no-op. unsigned NumSignBits = ComputeNumSignBits(Op0); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 602b203371..08aedb3200 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,9 +16,10 @@ #include "InstCombine.h" #include "llvm/DataLayout.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; - +using namespace llvm::PatternMatch; /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there @@ -199,8 +200,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == (DemandedMask & (~LHSKnownZero))) return I->getOperand(1); + } else if (I->getOpcode() == Instruction::Xor) { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); } - + // Compute the KnownZero/KnownOne bits to simplify things downstream. ComputeMaskedBits(I, KnownZero, KnownOne, Depth); return 0; @@ -580,6 +594,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + { + Value *VarX; ConstantInt *C1; + if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { + Instruction *Shr = cast<Instruction>(I->getOperand(0)); + Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, + KnownZero, KnownOne); + if (R) + return R; + } + } + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); @@ -800,6 +825,79 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; } +/// Helper routine of SimplifyDemandedUseBits. It tries to simplify +/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into +/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign +/// of "C2-C1". +/// +/// Suppose E1 and E2 are generally different in bits S={bm, bm+1, +/// ..., bn}, without considering the specific value X is holding. +/// This transformation is legal iff one of following conditions is hold: +/// 1) All the bit in S are 0, in this case E1 == E2. +/// 2) We don't care those bits in S, per the input DemandedMask. +/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the +/// rest bits. +/// +/// Currently we only test condition 2). +/// +/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was +/// not successful. +Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, + Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) { + + unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue(); + unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue(); + + KnownOne.clearAllBits(); + KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); + KnownZero &= DemandedMask; + + if (ShlAmt == 0 || ShrAmt == 0) + return 0; + + Value *VarX = Shr->getOperand(0); + Type *Ty = VarX->getType(); + + APInt BitMask1(Ty->getIntegerBitWidth(), (uint64_t)-1); + APInt BitMask2(Ty->getIntegerBitWidth(), (uint64_t)-1); + + bool isLshr = (Shr->getOpcode() == Instruction::LShr); + BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : + (BitMask1.ashr(ShrAmt) << ShlAmt); + + if (ShrAmt <= ShlAmt) { + BitMask2 <<= (ShlAmt - ShrAmt); + } else { + BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): + BitMask2.ashr(ShrAmt - ShlAmt); + } + + // Check if condition-2 (see the comment to this function) is satified. + if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { + if (ShrAmt == ShlAmt) + return VarX; + + if (!Shr->hasOneUse()) + return 0; + + BinaryOperator *New; + if (ShrAmt < ShlAmt) { + Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); + New = BinaryOperator::CreateShl(VarX, Amt); + BinaryOperator *Orig = cast<BinaryOperator>(Shl); + New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); + New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); + } else { + Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); + New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : + BinaryOperator::CreateAShr(VarX, Amt); + } + + return InsertNewInstWith(New, *Shl); + } + + return 0; +} /// SimplifyDemandedVectorElts - The specified value produces a vector with /// any number of elements. DemandedElts contains the set of elements that are diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index ea654ae9ed..b1a4966920 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -11,11 +11,11 @@ #define INSTCOMBINE_WORKLIST_H #define DEBUG_TYPE "instcombine" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Instruction.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f8c3ae558..9da58d0e71 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -36,22 +36,23 @@ #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm-c/Initialization.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm-c/Initialization.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <climits> using namespace llvm; @@ -65,6 +66,11 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstCombinerPass(Registry); @@ -1055,7 +1061,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); @@ -1074,7 +1080,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { + if (IndexTy != IntPtrTy) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -2367,6 +2373,24 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { return MadeIRChange; } +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(TD, TLI, UnsafeFPShrink) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<DataLayout>(); @@ -2379,7 +2403,7 @@ bool InstCombiner::runOnFunction(Function &F) { InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - LibCallSimplifier TheSimplifier(TD, TLI); + InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this); Simplifier = &TheSimplifier; bool EverMadeChange = false; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 0c6a406203..f095cff33c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,14 +15,8 @@ #define DEBUG_TYPE "asan" +#include "llvm/Transforms/Instrumentation.h" #include "BlackList.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/InlineAsm.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" @@ -30,19 +24,24 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" - -#include <string> +#include "llvm/Type.h" #include <algorithm> +#include <string> using namespace llvm; @@ -69,6 +68,10 @@ static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; static const char *kAsanStackMallocName = "__asan_stack_malloc"; static const char *kAsanStackFreeName = "__asan_stack_free"; +static const char *kAsanGenPrefix = "__asan_gen_"; +static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; +static const char *kAsanUnpoisonStackMemoryName = + "__asan_unpoison_stack_memory"; static const int kAsanStackLeftRedzoneMagic = 0xf1; static const int kAsanStackMidRedzoneMagic = 0xf2; @@ -112,9 +115,10 @@ static cl::opt<bool> ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); -// This flag may need to be replaced with -fasan-blacklist. -static cl::opt<std::string> ClBlackListFile("asan-blacklist", - cl::desc("File containing the list of functions to ignore " +static cl::opt<bool> ClRealignStack("asan-realign-stack", + cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true)); +static cl::opt<std::string> ClBlacklistFile("asan-blacklist", + cl::desc("File containing the list of objects to ignore " "during instrumentation"), cl::Hidden); // These flags allow to change the shadow mapping. @@ -135,6 +139,10 @@ static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", static cl::opt<bool> ClOptGlobals("asan-opt-globals", cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClCheckLifetime("asan-check-lifetime", + cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), + cl::Hidden, cl::init(false)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); @@ -148,10 +156,56 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), cl::Hidden, cl::init(-1)); namespace { +/// A set of dynamically initialized globals extracted from metadata. +class SetOfDynamicallyInitializedGlobals { + public: + void Init(Module& M) { + // Clang generates metadata identifying all dynamically initialized globals. + NamedMDNode *DynamicGlobals = + M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); + if (!DynamicGlobals) + return; + for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { + MDNode *MDN = DynamicGlobals->getOperand(i); + assert(MDN->getNumOperands() == 1); + Value *VG = MDN->getOperand(0); + // The optimizer may optimize away a global entirely, in which case we + // cannot instrument access to it. + if (!VG) + continue; + DynInitGlobals.insert(cast<GlobalVariable>(VG)); + } + } + bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; } + private: + SmallSet<GlobalValue*, 32> DynInitGlobals; +}; + +static int MappingScale() { + return ClMappingScale ? ClMappingScale : kDefaultShadowScale; +} + +static size_t RedzoneSize() { + // Redzone used for stack and globals is at least 32 bytes. + // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. + return std::max(32U, 1U << MappingScale()); +} + /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(); - virtual const char *getPassName() const; + AddressSanitizer(bool CheckInitOrder = false, + bool CheckUseAfterReturn = false, + bool CheckLifetime = false, + StringRef BlacklistFile = StringRef()) + : FunctionPass(ID), + CheckInitOrder(CheckInitOrder || ClInitializers), + CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn), + CheckLifetime(CheckLifetime || ClCheckLifetime), + BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile + : BlacklistFile) {} + virtual const char *getPassName() const { + return "AddressSanitizerFunctionPass"; + } void instrumentMop(Instruction *I); void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); @@ -170,52 +224,86 @@ struct AddressSanitizer : public FunctionPass { bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool poisonStackInFunction(Function &F); virtual bool doInitialization(Module &M); - virtual bool doFinalization(Module &M); - bool insertGlobalRedzones(Module &M); static char ID; // Pass identification, replacement for typeid private: + void initializeCallbacks(Module &M); uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); return SizeInBytes; } uint64_t getAlignedSize(uint64_t SizeInBytes) { - return ((SizeInBytes + RedzoneSize - 1) - / RedzoneSize) * RedzoneSize; + size_t RZ = RedzoneSize(); + return ((SizeInBytes + RZ - 1) / RZ) * RZ; } uint64_t getAlignedAllocaSize(AllocaInst *AI) { uint64_t SizeInBytes = getAllocaSizeInBytes(AI); return getAlignedSize(SizeInBytes); } - Function *checkInterfaceFunction(Constant *FuncOrBitcast); bool ShouldInstrumentGlobal(GlobalVariable *G); void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); void FindDynamicInitializers(Module &M); - bool HasDynamicInitializer(GlobalVariable *G); - + /// Analyze lifetime intrinsics for given alloca. Use Value* instead of + /// AllocaInst* here, as we call this method after we merge all allocas into a + /// single one. Returns true if ASan added some instrumentation. + bool handleAllocaLifetime(Value *Alloca); + /// Analyze lifetime intrinsics for a specific value, casted from alloca. + /// Returns true if if ASan added some instrumentation. + bool handleValueLifetime(Value *V); + void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison); + + bool CheckInitOrder; + bool CheckUseAfterReturn; + bool CheckLifetime; LLVMContext *C; DataLayout *TD; uint64_t MappingOffset; - int MappingScale; - size_t RedzoneSize; int LongSize; Type *IntptrTy; Type *IntptrPtrTy; Function *AsanCtorFunction; Function *AsanInitFunction; Function *AsanStackMallocFunc, *AsanStackFreeFunc; + Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; Function *AsanHandleNoReturnFunc; - Instruction *CtorInsertBefore; + SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; - SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; - SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan; + SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; +}; + +class AddressSanitizerModule : public ModulePass { + public: + AddressSanitizerModule(bool CheckInitOrder = false, + StringRef BlacklistFile = StringRef()) + : ModulePass(ID), + CheckInitOrder(CheckInitOrder || ClInitializers), + BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile + : BlacklistFile) {} + bool runOnModule(Module &M); + static char ID; // Pass identification, replacement for typeid + virtual const char *getPassName() const { + return "AddressSanitizerModule"; + } + + private: + bool ShouldInstrumentGlobal(GlobalVariable *G); + void createInitializerPoisonCalls(Module &M, Value *FirstAddr, + Value *LastAddr); + + bool CheckInitOrder; + SmallString<64> BlacklistFile; + OwningPtr<BlackList> BL; + SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; + Type *IntptrTy; + LLVMContext *C; + DataLayout *TD; }; } // namespace @@ -224,13 +312,20 @@ char AddressSanitizer::ID = 0; INITIALIZE_PASS(AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { } -FunctionPass *llvm::createAddressSanitizerPass() { - return new AddressSanitizer(); +FunctionPass *llvm::createAddressSanitizerFunctionPass( + bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime, + StringRef BlacklistFile) { + return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn, + CheckLifetime, BlacklistFile); } -const char *AddressSanitizer::getPassName() const { - return "AddressSanitizer"; +char AddressSanitizerModule::ID = 0; +INITIALIZE_PASS(AddressSanitizerModule, "asan-module", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs." + "ModulePass", false, false) +ModulePass *llvm::createAddressSanitizerModulePass( + bool CheckInitOrder, StringRef BlacklistFile) { + return new AddressSanitizerModule(CheckInitOrder, BlacklistFile); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -243,12 +338,17 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); return new GlobalVariable(M, StrConst->getType(), true, - GlobalValue::PrivateLinkage, StrConst, ""); + GlobalValue::PrivateLinkage, StrConst, + kAsanGenPrefix); +} + +static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { + return G->getName().find(kAsanGenPrefix) == 0; } Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale - Shadow = IRB.CreateLShr(Shadow, MappingScale); + Shadow = IRB.CreateLShr(Shadow, MappingScale()); if (MappingOffset == 0) return Shadow; // (Shadow >> scale) | offset @@ -328,30 +428,6 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { return NULL; } -void AddressSanitizer::FindDynamicInitializers(Module& M) { - // Clang generates metadata identifying all dynamically initialized globals. - NamedMDNode *DynamicGlobals = - M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); - if (!DynamicGlobals) - return; - for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { - MDNode *MDN = DynamicGlobals->getOperand(i); - assert(MDN->getNumOperands() == 1); - Value *VG = MDN->getOperand(0); - // The optimizer may optimize away a global entirely, in which case we - // cannot instrument access to it. - if (!VG) - continue; - - GlobalVariable *G = cast<GlobalVariable>(VG); - DynamicallyInitializedGlobals.insert(G); - } -} -// Returns true if a global variable is initialized dynamically in this TU. -bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { - return DynamicallyInitializedGlobals.count(G); -} - void AddressSanitizer::instrumentMop(Instruction *I) { bool IsWrite = false; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); @@ -360,14 +436,12 @@ void AddressSanitizer::instrumentMop(Instruction *I) { if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. - if (!ClInitializers) + if (!CheckInitOrder) return; // If a global variable does not have dynamic initialization we don't - // have to instrument it. However, if a global has external linkage, we - // assume it has dynamic initialization, as it may have an initializer - // in a different TU. - if (G->getLinkage() != GlobalVariable::ExternalLinkage && - !HasDynamicInitializer(G)) + // have to instrument it. However, if a global does not have initailizer + // at all, we assume it has dynamic initializer (in other TU). + if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G)) return; } } @@ -392,7 +466,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) { // function of AddressSanitizer. If the instrumented module defines a function // with the same name, their prototypes must match, otherwise // getOrInsertFunction returns a bitcast. -Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) { +static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); FuncOrBitcast->dump(); report_fatal_error("trying to redefine an AddressSanitizer " @@ -415,7 +489,7 @@ Instruction *AddressSanitizer::generateCrashCode( Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize) { - size_t Granularity = 1 << MappingScale; + size_t Granularity = 1 << MappingScale(); // Addr & (Granularity - 1) Value *LastAccessedByte = IRB.CreateAnd( AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); @@ -436,7 +510,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Type *ShadowTy = IntegerType::get( - *C, std::max(8U, TypeSize >> MappingScale)); + *C, std::max(8U, TypeSize >> MappingScale())); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); @@ -445,7 +519,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - size_t Granularity = 1 << MappingScale; + size_t Granularity = 1 << MappingScale(); TerminatorInst *CrashTerm = 0; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { @@ -469,9 +543,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Crash->setDebugLoc(OrigIns->getDebugLoc()); } -void AddressSanitizer::createInitializerPoisonCalls(Module &M, - Value *FirstAddr, - Value *LastAddr) { +void AddressSanitizerModule::createInitializerPoisonCalls( + Module &M, Value *FirstAddr, Value *LastAddr) { // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); // If that function is not present, this TU contains no globals, or they have @@ -502,14 +575,14 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M, } } -bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { +bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; - if (GlobalsCreatedByAsan.count(G)) return false; // Our own global. + if (GlobalWasGeneratedByAsan(G)) return false; // Our own global. // Touch only those globals that will not be defined in other modules. // Don't handle ODR type linkages since other modules may be built w/o asan. if (G->getLinkage() != GlobalVariable::ExternalLinkage && @@ -522,7 +595,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { if (G->isThreadLocal()) return false; // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize) return false; + if (G->getAlignment() > RedzoneSize()) return false; // Ignore all the globals with the names starting with "\01L_OBJC_". // Many of those are put into the .cstring section. The linker compresses @@ -564,7 +637,17 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizer::insertGlobalRedzones(Module &M) { +bool AddressSanitizerModule::runOnModule(Module &M) { + if (!ClGlobals) return false; + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(BlacklistFile)); + if (BL->isIn(M)) return false; + DynamicallyInitializedGlobals.Init(M); + C = &(M.getContext()); + IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits()); + SmallVector<GlobalVariable *, 16> GlobalsToChange; for (Module::GlobalListType::iterator G = M.global_begin(), @@ -588,10 +671,10 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; - IRBuilder<> IRB(CtorInsertBefore); - if (ClInitializers) - FindDynamicInitializers(M); + Function *CtorFunc = M.getFunction(kAsanModuleCtorName); + assert(CtorFunc); + IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); // The addresses of the first and last dynamically initialized globals in // this TU. Used in initialization order checking. @@ -602,11 +685,12 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { PointerType *PtrTy = cast<PointerType>(G->getType()); Type *Ty = PtrTy->getElementType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); - uint64_t RightRedzoneSize = RedzoneSize + - (RedzoneSize - (SizeInBytes % RedzoneSize)); + size_t RZ = RedzoneSize(); + uint64_t RightRedzoneSize = RZ + (RZ - (SizeInBytes % RZ)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. - bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); + bool GlobalHasDynamicInitializer = + DynamicallyInitializedGlobals.Contains(G); // Don't check initialization order if this global is blacklisted. GlobalHasDynamicInitializer &= !BL->isInInit(*G); @@ -626,7 +710,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, NewTy, G->isConstant(), G->getLinkage(), NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); - NewGlobal->setAlignment(RedzoneSize); + NewGlobal->setAlignment(RZ); Value *Indices2[2]; Indices2[0] = IRB.getInt32(0); @@ -647,7 +731,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { NULL); // Populate the first and last globals declared in this TU. - if (ClInitializers && GlobalHasDynamicInitializer) { + if (CheckInitOrder && GlobalHasDynamicInitializer) { LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); if (FirstDynamic == 0) FirstDynamic = LastDynamic; @@ -662,7 +746,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (ClInitializers && FirstDynamic && LastDynamic) + if (CheckInitOrder && FirstDynamic && LastDynamic) createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( @@ -696,33 +780,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { return true; } -// virtual -bool AddressSanitizer::doInitialization(Module &M) { - // Initialize the private fields. No one has accessed them before. - TD = getAnalysisIfAvailable<DataLayout>(); - - if (!TD) - return false; - BL.reset(new BlackList(ClBlackListFile)); - - C = &(M.getContext()); - LongSize = TD->getPointerSizeInBits(); - IntptrTy = Type::getIntNTy(*C, LongSize); - IntptrPtrTy = PointerType::get(IntptrTy, 0); - - AsanCtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); - BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); - CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB); - - // call __asan_init in the module ctor. - IRBuilder<> IRB(CtorInsertBefore); - AsanInitFunction = checkInterfaceFunction( - M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); - AsanInitFunction->setLinkage(Function::ExternalLinkage); - IRB.CreateCall(AsanInitFunction); - +void AddressSanitizer::initializeCallbacks(Module &M) { + IRBuilder<> IRB(*C); // Create __asan_report* callbacks. for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; @@ -731,8 +790,9 @@ bool AddressSanitizer::doInitialization(Module &M) { std::string FunctionName = std::string(kAsanReportErrorTemplate) + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); // If we are merging crash callbacks, they have two parameters. - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( - M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } @@ -743,11 +803,42 @@ bool AddressSanitizer::doInitialization(Module &M) { IntptrTy, IntptrTy, IntptrTy, NULL)); AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); +} + +// virtual +bool AddressSanitizer::doInitialization(Module &M) { + // Initialize the private fields. No one has accessed them before. + TD = getAnalysisIfAvailable<DataLayout>(); + + if (!TD) + return false; + BL.reset(new BlackList(BlacklistFile)); + DynamicallyInitializedGlobals.Init(M); + + C = &(M.getContext()); + LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + IntptrPtrTy = PointerType::get(IntptrTy, 0); + + AsanCtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); + // call __asan_init in the module ctor. + IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); + AsanInitFunction = checkInterfaceFunction( + M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); + AsanInitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(AsanInitFunction); llvm::Triple targetTriple(M.getTargetTriple()); bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; @@ -762,13 +853,6 @@ bool AddressSanitizer::doInitialization(Module &M) { MappingOffset = 1ULL << ClMappingOffsetLog; } } - MappingScale = kDefaultShadowScale; - if (ClMappingScale) { - MappingScale = ClMappingScale; - } - // Redzone used for stack and globals is at least 32 bytes. - // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. - RedzoneSize = std::max(32, (int)(1 << MappingScale)); if (ClMappingOffsetLog >= 0) { @@ -783,7 +867,7 @@ bool AddressSanitizer::doInitialization(Module &M) { if (ClMappingScale) { GlobalValue *asan_mapping_scale = new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingScale), + ConstantInt::get(IntptrTy, MappingScale()), kAsanMappingScaleName); // Read the global, otherwise it may be optimized away. IRB.CreateLoad(asan_mapping_scale, true); @@ -794,15 +878,6 @@ bool AddressSanitizer::doInitialization(Module &M) { return true; } -bool AddressSanitizer::doFinalization(Module &M) { - // We transform the globals at the very end so that the optimization analysis - // works on the original globals. - if (ClGlobals) - return insertGlobalRedzones(M); - return false; -} - - bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -823,6 +898,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); + initializeCallbacks(*F.getParent()); // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); @@ -911,10 +987,10 @@ static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) { static void PoisonShadowPartialRightRedzone(uint8_t *Shadow, size_t Size, - size_t RedzoneSize, + size_t RZSize, size_t ShadowGranularity, uint8_t Magic) { - for (size_t i = 0; i < RedzoneSize; + for (size_t i = 0; i < RZSize; i+= ShadowGranularity, Shadow++) { if (i + ShadowGranularity <= Size) { *Shadow = 0; // fully addressable @@ -929,7 +1005,7 @@ static void PoisonShadowPartialRightRedzone(uint8_t *Shadow, void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison) { - size_t ShadowRZSize = RedzoneSize >> MappingScale; + size_t ShadowRZSize = RedzoneSize() >> MappingScale(); assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); Type *RZPtrTy = PointerType::get(RZTy, 0); @@ -945,12 +1021,12 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy)); // poison all other red zones. - uint64_t Pos = RedzoneSize; + uint64_t Pos = RedzoneSize(); for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { AllocaInst *AI = AllocaVec[i]; uint64_t SizeInBytes = getAllocaSizeInBytes(AI); uint64_t AlignedSize = getAlignedAllocaSize(AI); - assert(AlignedSize - SizeInBytes < RedzoneSize); + assert(AlignedSize - SizeInBytes < RedzoneSize()); Value *Ptr = NULL; Pos += AlignedSize; @@ -960,13 +1036,13 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, // Poison the partial redzone at right Ptr = IRB.CreateAdd( ShadowBase, ConstantInt::get(IntptrTy, - (Pos >> MappingScale) - ShadowRZSize)); - size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes); + (Pos >> MappingScale()) - ShadowRZSize)); + size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes); uint32_t Poison = 0; if (DoPoison) { PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, - RedzoneSize, - 1ULL << MappingScale, + RedzoneSize(), + 1ULL << MappingScale(), kAsanStackPartialRedzoneMagic); } Value *PartialPoison = ConstantInt::get(RZTy, Poison); @@ -975,11 +1051,11 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, // Poison the full redzone at right. Ptr = IRB.CreateAdd(ShadowBase, - ConstantInt::get(IntptrTy, Pos >> MappingScale)); + ConstantInt::get(IntptrTy, Pos >> MappingScale())); Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid; IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); - Pos += RedzoneSize; + Pos += RedzoneSize(); } } @@ -995,6 +1071,74 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { return true; } +// Handling llvm.lifetime intrinsics for a given %alloca: +// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca. +// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect +// invalid accesses) and unpoison it for llvm.lifetime.start (the memory +// could be poisoned by previous llvm.lifetime.end instruction, as the +// variable may go in and out of scope several times, e.g. in loops). +// (3) if we poisoned at least one %alloca in a function, +// unpoison the whole stack frame at function exit. +bool AddressSanitizer::handleAllocaLifetime(Value *Alloca) { + assert(CheckLifetime); + Type *AllocaType = Alloca->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(AllocaType->getContext()); + + bool Res = false; + // Typical code looks like this: + // %alloca = alloca <type>, <alignment> + // ... some code ... + // %val1 = bitcast <type>* %alloca to i8* + // call void @llvm.lifetime.start(i64 <size>, i8* %val1) + // ... more code ... + // %val2 = bitcast <type>* %alloca to i8* + // call void @llvm.lifetime.start(i64 <size>, i8* %val2) + // That is, to handle %alloca we must find all its casts to + // i8* values, and find lifetime instructions for these values. + if (AllocaType == Int8PtrTy) + Res |= handleValueLifetime(Alloca); + for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); + UI != UE; ++UI) { + if (UI->getType() != Int8PtrTy) continue; + if (UI->stripPointerCasts() != Alloca) continue; + Res |= handleValueLifetime(*UI); + } + return Res; +} + +bool AddressSanitizer::handleValueLifetime(Value *V) { + assert(CheckLifetime); + bool Res = false; + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; + ++UI) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) continue; + Intrinsic::ID ID = II->getIntrinsicID(); + if (ID != Intrinsic::lifetime_start && + ID != Intrinsic::lifetime_end) + continue; + if (V != II->getArgOperand(1)) + continue; + // Found lifetime intrinsic, add ASan instrumentation if necessary. + ConstantInt *Size = dyn_cast<ConstantInt>(II->getArgOperand(0)); + // If size argument is undefined, don't do anything. + if (Size->isMinusOne()) + continue; + // Check that size doesn't saturate uint64_t and can + // be stored in IntptrTy. + const uint64_t SizeValue = Size->getValue().getLimitedValue(); + if (SizeValue == ~0ULL || + !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) { + continue; + } + IRBuilder<> IRB(II); + bool DoPoison = (ID == Intrinsic::lifetime_end); + poisonAlloca(V, SizeValue, IRB, DoPoison); + Res = true; + } + return Res; +} + // Find all static Alloca instructions and put // poisoned red zones around all of them. // Then unpoison everything back before the function returns. @@ -1013,9 +1157,11 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { SmallVector<AllocaInst*, 16> AllocaVec; SmallVector<Instruction*, 8> RetVec; uint64_t TotalSize = 0; + bool HavePoisonedAllocas = false; // Filter out Alloca instructions we want (and can) handle. // Collect Ret instructions. + unsigned ResultAlignment = 1 << MappingScale(); for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { BasicBlock &BB = *FI; @@ -1031,7 +1177,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { if (AI->isArrayAllocation()) continue; if (!AI->isStaticAlloca()) continue; if (!AI->getAllocatedType()->isSized()) continue; - if (AI->getAlignment() > RedzoneSize) continue; + ResultAlignment = std::max(ResultAlignment, AI->getAlignment()); AllocaVec.push_back(AI); uint64_t AlignedSize = getAlignedAllocaSize(AI); TotalSize += AlignedSize; @@ -1040,9 +1186,9 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { if (AllocaVec.empty()) return false; - uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize; + uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize(); - bool DoStackMalloc = ClUseAfterReturn + bool DoStackMalloc = CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; Instruction *InsBefore = AllocaVec[0]; @@ -1052,7 +1198,9 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); - MyAlloca->setAlignment(RedzoneSize); + if (ClRealignStack && ResultAlignment < RedzoneSize()) + ResultAlignment = RedzoneSize(); + MyAlloca->setAlignment(ResultAlignment); assert(MyAlloca->isStaticAlloca()); Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy); Value *LocalStackBase = OrigStackBase; @@ -1067,7 +1215,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { raw_svector_ostream StackDescription(StackDescriptionStorage); StackDescription << F.getName() << " " << AllocaVec.size() << " "; - uint64_t Pos = RedzoneSize; + uint64_t Pos = RedzoneSize(); // Replace Alloca instructions with base+offset. for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { AllocaInst *AI = AllocaVec[i]; @@ -1076,12 +1224,15 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { StackDescription << Pos << " " << SizeInBytes << " " << Name.size() << " " << Name << " "; uint64_t AlignedSize = getAlignedAllocaSize(AI); - assert((AlignedSize % RedzoneSize) == 0); - AI->replaceAllUsesWith( - IRB.CreateIntToPtr( + assert((AlignedSize % RedzoneSize()) == 0); + Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), - AI->getType())); - Pos += AlignedSize + RedzoneSize; + AI->getType()); + AI->replaceAllUsesWith(NewAllocaPtr); + // Analyze lifetime intrinsics only for static allocas we handle. + if (CheckLifetime) + HavePoisonedAllocas |= handleAllocaLifetime(NewAllocaPtr); + Pos += AlignedSize + RedzoneSize(); } assert(Pos == LocalStackSize); @@ -1092,9 +1243,8 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, LongSize/8)); BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); - GlobalVariable *StackDescriptionGlobal = + GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), StackDescription.str()); - GlobalsCreatedByAsan.insert(StackDescriptionGlobal); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); @@ -1114,9 +1264,15 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false); if (DoStackMalloc) { + // In use-after-return mode, mark the whole stack frame unaddressable. IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase, ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); + } else if (HavePoisonedAllocas) { + // If we poisoned some allocas in llvm.lifetime analysis, + // unpoison whole stack frame now. + assert(LocalStackBase == OrigStackBase); + poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false); } } @@ -1130,3 +1286,13 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { return true; } + +void AddressSanitizer::poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, + bool DoPoison) { + // For now just insert the call to ASan runtime. + Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); + Value *SizeArg = ConstantInt::get(IntptrTy, Size); + IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc + : AsanUnpoisonStackMemoryFunc, + AddrArg, SizeArg); +} diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index ef34b8a56d..0bfb186562 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -13,13 +13,11 @@ // //===----------------------------------------------------------------------===// -#include <utility> -#include <string> - #include "BlackList.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Module.h" @@ -27,12 +25,14 @@ #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" +#include <string> +#include <utility> namespace llvm { BlackList::BlackList(const StringRef Path) { // Validate and open blacklist file. - if (!Path.size()) return; + if (Path.empty()) return; OwningPtr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(Path, File)) { report_fatal_error("Can't open blacklist file: " + Path + ": " + @@ -52,6 +52,10 @@ BlackList::BlackList(const StringRef Path) { std::pair<StringRef, StringRef> SplitLine = I->split(":"); StringRef Prefix = SplitLine.first; std::string Regexp = SplitLine.second; + if (Regexp.empty()) { + // Missing ':' in the line. + report_fatal_error("malformed blacklist line: " + SplitLine.first); + } // Replace * with .* for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; @@ -68,7 +72,7 @@ BlackList::BlackList(const StringRef Path) { } // Add this regexp into the proper group by its prefix. - if (Regexps[Prefix].size()) + if (!Regexps[Prefix].empty()) Regexps[Prefix] += "|"; Regexps[Prefix] += Regexp; } @@ -92,14 +96,29 @@ bool BlackList::isIn(const Module &M) { return inSection("src", M.getModuleIdentifier()); } +static StringRef GetGVTypeString(const GlobalVariable &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + bool BlackList::isInInit(const GlobalVariable &G) { - return isIn(*G.getParent()) || inSection("global-init", G.getName()); + return (isIn(*G.getParent()) || + inSection("global-init", G.getName()) || + inSection("global-init-type", GetGVTypeString(G))); } -bool BlackList::inSection(const StringRef Section, - const StringRef Query) { - Regex *FunctionRegex = Entries[Section]; - return FunctionRegex ? FunctionRegex->match(Query) : false; +bool BlackList::inSection(const StringRef Section, const StringRef Query) { + StringMap<Regex*>::iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + + Regex *FunctionRegex = I->getValue(); + return FunctionRegex->match(Query); } } // namespace llvm diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h index f3c05a5058..ee18a98567 100644 --- a/lib/Transforms/Instrumentation/BlackList.h +++ b/lib/Transforms/Instrumentation/BlackList.h @@ -18,6 +18,7 @@ // fun:*_ZN4base6subtle* // global:*global_with_bad_access_or_initialization* // global-init:*global_with_initialization_issues* +// global-init-type:*Namespace::ClassName* // src:file_with_tricky_code.cc // --- // Note that the wild card is in fact an llvm::Regex, but * is automatically diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 7810b1b8a3..303e04ac16 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bounds-checking" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/DataLayout.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Instrumentation.h" using namespace llvm; static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", @@ -41,7 +41,7 @@ namespace { struct BoundsChecking : public FunctionPass { static char ID; - BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){ + BoundsChecking() : FunctionPass(ID) { initializeBoundsCheckingPass(*PassRegistry::getPassRegistry()); } @@ -59,7 +59,6 @@ namespace { BuilderTy *Builder; Instruction *Inst; BasicBlock *TrapBB; - unsigned Penalty; BasicBlock *getTrapBB(); void emitBranchToTrap(Value *Cmp = 0); @@ -109,6 +108,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { else Cmp = 0; // unconditional branch } + ++ChecksAdded; Instruction *Inst = Builder->GetInsertPoint(); BasicBlock *OldBB = Inst->getParent(); @@ -163,7 +163,6 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { } emitBranchToTrap(Or); - ++ChecksAdded; return true; } @@ -208,6 +207,6 @@ bool BoundsChecking::runOnFunction(Function &F) { return MadeChange; } -FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) { - return new BoundsChecking(Penalty); +FunctionPass *llvm::createBoundsCheckingPass() { + return new BoundsChecking(); } diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 058f68c7ce..1c9e053679 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMInstrumentation BoundsChecking.cpp EdgeProfiling.cpp GCOVProfiling.cpp + MemorySanitizer.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp PathProfiling.cpp diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index e8ef2654d2..41e42aff49 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -18,13 +18,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-edge-profiling" +#include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" -#include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index e9192e5cdd..5e064cd70d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -16,19 +16,19 @@ #define DEBUG_TYPE "insert-gcov-profiling" -#include "ProfilingUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/DebugInfo.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "ProfilingUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/DebugInfo.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/InstIterator.h" @@ -45,11 +45,11 @@ namespace { static char ID; GCOVProfiler() : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), - UseExtraChecksum(false) { + UseExtraChecksum(false), NoRedZone(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false, - bool useExtraChecksum = false) + bool useExtraChecksum = false, bool NoRedZone = false) : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); @@ -98,6 +98,7 @@ namespace { bool EmitData; bool Use402Format; bool UseExtraChecksum; + bool NoRedZone; Module *M; LLVMContext *Ctx; @@ -110,8 +111,10 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, bool Use402Format, - bool UseExtraChecksum) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum); + bool UseExtraChecksum, + bool NoRedZone) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum, + NoRedZone); } namespace { @@ -540,13 +543,13 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( // read it. Threads and invoke make this untrue. // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. + size_t TableSize = Succs.size() * Preds.size(); Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); - ArrayType *EdgeTableTy = ArrayType::get( - Int64PtrTy, Succs.size() * Preds.size()); + ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize); - Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; + OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]); Constant *NullValue = Constant::getNullValue(Int64PtrTy); - for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) + for (size_t i = 0; i != TableSize; ++i) EdgeTable[i] = NullValue; unsigned Edge = 0; @@ -566,7 +569,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Edge += Successors; } - ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); + ArrayRef<Constant*> V(&EdgeTable[0], TableSize); GlobalVariable *EdgeTableGV = new GlobalVariable( *M, EdgeTableTy, true, GlobalValue::InternalLinkage, @@ -638,6 +641,9 @@ void GCOVProfiler::insertCounterWriteout( WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); + WriteoutF->addFnAttr(Attributes::NoInline); + if (NoRedZone) + WriteoutF->addFnAttr(Attributes::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); IRBuilder<> Builder(BB); @@ -683,6 +689,8 @@ void GCOVProfiler::insertCounterWriteout( F->setUnnamedAddr(true); F->setLinkage(GlobalValue::InternalLinkage); F->addFnAttr(Attributes::NoInline); + if (NoRedZone) + F->addFnAttr(Attributes::NoRedZone); BB = BasicBlock::Create(*Ctx, "entry", F); Builder.SetInsertPoint(BB); @@ -702,6 +710,8 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attributes::NoInline); + if (NoRedZone) + Fn->addFnAttr(Attributes::NoRedZone); Type *Int32Ty = Type::getInt32Ty(*Ctx); Type *Int64Ty = Type::getInt64Ty(*Ctx); @@ -758,6 +768,9 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); + FlushF->addFnAttr(Attributes::NoInline); + if (NoRedZone) + FlushF->addFnAttr(Attributes::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 1e0b4a348a..8ba102559b 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -21,11 +21,13 @@ using namespace llvm; /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerPass(Registry); + initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); initializeEdgeProfilerPass(Registry); initializeGCOVProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); + initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index a4bb5a66af..50226db8c2 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -15,10 +15,10 @@ #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#include "llvm/BasicBlock.h" #include "llvm/ADT/EquivalenceClasses.h" -#include <vector> +#include "llvm/BasicBlock.h" #include <algorithm> +#include <vector> namespace llvm { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp new file mode 100644 index 0000000000..947a2e3b12 --- /dev/null +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -0,0 +1,1579 @@ +//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file is a part of MemorySanitizer, a detector of uninitialized +/// reads. +/// +/// Status: early prototype. +/// +/// The algorithm of the tool is similar to Memcheck +/// (http://goo.gl/QKbem). We associate a few shadow bits with every +/// byte of the application memory, poison the shadow of the malloc-ed +/// or alloca-ed memory, load the shadow bits on every memory read, +/// propagate the shadow bits through some of the arithmetic +/// instruction (including MOV), store the shadow bits on every memory +/// write, report a bug on some other instructions (e.g. JMP) if the +/// associated shadow is poisoned. +/// +/// But there are differences too. The first and the major one: +/// compiler instrumentation instead of binary instrumentation. This +/// gives us much better register allocation, possible compiler +/// optimizations and a fast start-up. But this brings the major issue +/// as well: msan needs to see all program events, including system +/// calls and reads/writes in system libraries, so we either need to +/// compile *everything* with msan or use a binary translation +/// component (e.g. DynamoRIO) to instrument pre-built libraries. +/// Another difference from Memcheck is that we use 8 shadow bits per +/// byte of application memory and use a direct shadow mapping. This +/// greatly simplifies the instrumentation code and avoids races on +/// shadow updates (Memcheck is single-threaded so races are not a +/// concern there. Memcheck uses 2 shadow bits per byte with a slow +/// path storage that uses 8 bits per byte). +/// +/// The default value of shadow is 0, which means "clean" (not poisoned). +/// +/// Every module initializer should call __msan_init to ensure that the +/// shadow memory is ready. On error, __msan_warning is called. Since +/// parameters and return values may be passed via registers, we have a +/// specialized thread-local shadow for return values +/// (__msan_retval_tls) and parameters (__msan_param_tls). +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msan" + +#include "llvm/Transforms/Instrumentation.h" +#include "BlackList.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" +#include "llvm/InstVisitor.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" + +using namespace llvm; + +static const uint64_t kShadowMask32 = 1ULL << 31; +static const uint64_t kShadowMask64 = 1ULL << 46; +static const uint64_t kOriginOffset32 = 1ULL << 30; +static const uint64_t kOriginOffset64 = 1ULL << 45; + +// This is an important flag that makes the reports much more +// informative at the cost of greater slowdown. Not fully implemented +// yet. +// FIXME: this should be a top-level clang flag, e.g. +// -fmemory-sanitizer-full. +static cl::opt<bool> ClTrackOrigins("msan-track-origins", + cl::desc("Track origins (allocation sites) of poisoned memory"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClKeepGoing("msan-keep-going", + cl::desc("keep going after reporting a UMR"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClPoisonStack("msan-poison-stack", + cl::desc("poison uninitialized stack variables"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", + cl::desc("poison uninitialized stack variables with a call"), + cl::Hidden, cl::init(false)); +static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", + cl::desc("poison uninitialized stack variables with the given patter"), + cl::Hidden, cl::init(0xff)); + +static cl::opt<bool> ClHandleICmp("msan-handle-icmp", + cl::desc("propagate shadow through ICmpEQ and ICmpNE"), + cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin", + cl::desc("store origin for clean (fully initialized) values"), + cl::Hidden, cl::init(false)); + +// This flag controls whether we check the shadow of the address +// operand of load or store. Such bugs are very rare, since load from +// a garbage address typically results in SEGV, but still happen +// (e.g. only lower bits of address are garbage, or the access happens +// early at program startup where malloc-ed memory is more likely to +// be zeroed. As of 2012-08-28 this flag adds 20% slowdown. +static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address", + cl::desc("report accesses through a pointer which has poisoned shadow"), + cl::Hidden, cl::init(true)); + +static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions", + cl::desc("print out instructions with default strict semantics"), + cl::Hidden, cl::init(false)); + +static cl::opt<std::string> ClBlackListFile("msan-blacklist", + cl::desc("File containing the list of functions where MemorySanitizer " + "should not report bugs"), cl::Hidden); + +namespace { + +/// \brief An instrumentation pass implementing detection of uninitialized +/// reads. +/// +/// MemorySanitizer: instrument the code in module to find +/// uninitialized reads. +class MemorySanitizer : public FunctionPass { +public: + MemorySanitizer() : FunctionPass(ID), TD(0), WarningFn(0) { } + const char *getPassName() const { return "MemorySanitizer"; } + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + static char ID; // Pass identification, replacement for typeid. + +private: + void initializeCallbacks(Module &M); + + DataLayout *TD; + LLVMContext *C; + Type *IntptrTy; + Type *OriginTy; + /// \brief Thread-local shadow storage for function parameters. + GlobalVariable *ParamTLS; + /// \brief Thread-local origin storage for function parameters. + GlobalVariable *ParamOriginTLS; + /// \brief Thread-local shadow storage for function return value. + GlobalVariable *RetvalTLS; + /// \brief Thread-local origin storage for function return value. + GlobalVariable *RetvalOriginTLS; + /// \brief Thread-local shadow storage for in-register va_arg function + /// parameters (x86_64-specific). + GlobalVariable *VAArgTLS; + /// \brief Thread-local shadow storage for va_arg overflow area + /// (x86_64-specific). + GlobalVariable *VAArgOverflowSizeTLS; + /// \brief Thread-local space used to pass origin value to the UMR reporting + /// function. + GlobalVariable *OriginTLS; + + /// \brief The run-time callback to print a warning. + Value *WarningFn; + /// \brief Run-time helper that copies origin info for a memory range. + Value *MsanCopyOriginFn; + /// \brief Run-time helper that generates a new origin value for a stack + /// allocation. + Value *MsanSetAllocaOriginFn; + /// \brief Run-time helper that poisons stack on function entry. + Value *MsanPoisonStackFn; + /// \brief MSan runtime replacements for memmove, memcpy and memset. + Value *MemmoveFn, *MemcpyFn, *MemsetFn; + + /// \brief Address mask used in application-to-shadow address calculation. + /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask. + uint64_t ShadowMask; + /// \brief Offset of the origin shadow from the "normal" shadow. + /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL + uint64_t OriginOffset; + /// \brief Branch weights for error reporting. + MDNode *ColdCallWeights; + /// \brief Branch weights for origin store. + MDNode *OriginStoreWeights; + /// \brief The blacklist. + OwningPtr<BlackList> BL; + /// \brief An empty volatile inline asm that prevents callback merge. + InlineAsm *EmptyAsm; + + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; +}; +} // namespace + +char MemorySanitizer::ID = 0; +INITIALIZE_PASS(MemorySanitizer, "msan", + "MemorySanitizer: detects uninitialized reads.", + false, false) + +FunctionPass *llvm::createMemorySanitizerPass() { + return new MemorySanitizer(); +} + +/// \brief Create a non-const global initialized with the given string. +/// +/// Creates a writable global for Str so that we can pass it to the +/// run-time lib. Runtime uses first 4 bytes of the string to store the +/// frame ID, so the string needs to be mutable. +static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, + StringRef Str) { + Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); + return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false, + GlobalValue::PrivateLinkage, StrConst, ""); +} + + +/// \brief Insert extern declaration of runtime-provided functions and globals. +void MemorySanitizer::initializeCallbacks(Module &M) { + // Only do this once. + if (WarningFn) + return; + + IRBuilder<> IRB(*C); + // Create the callback. + // FIXME: this function should have "Cold" calling conv, + // which is not yet implemented. + StringRef WarningFnName = ClKeepGoing ? "__msan_warning" + : "__msan_warning_noreturn"; + WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL); + + MsanCopyOriginFn = M.getOrInsertFunction( + "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL); + MsanSetAllocaOriginFn = M.getOrInsertFunction( + "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, + IRB.getInt8PtrTy(), NULL); + MsanPoisonStackFn = M.getOrInsertFunction( + "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL); + MemmoveFn = M.getOrInsertFunction( + "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL); + MemcpyFn = M.getOrInsertFunction( + "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL); + MemsetFn = M.getOrInsertFunction( + "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy, NULL); + + // Create globals. + RetvalTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 8), false, + GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + RetvalOriginTLS = new GlobalVariable( + M, OriginTy, false, GlobalVariable::ExternalLinkage, 0, + "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + ParamTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 1000), false, + GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + ParamOriginTLS = new GlobalVariable( + M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage, + 0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + VAArgTLS = new GlobalVariable( + M, ArrayType::get(IRB.getInt64Ty(), 1000), false, + GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + VAArgOverflowSizeTLS = new GlobalVariable( + M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0, + "__msan_va_arg_overflow_size_tls", 0, + GlobalVariable::GeneralDynamicTLSModel); + OriginTLS = new GlobalVariable( + M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0, + "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel); + + // We insert an empty inline asm after __msan_report* to avoid callback merge. + EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), + StringRef(""), StringRef(""), + /*hasSideEffects=*/true); +} + +/// \brief Module-level initialization. +/// +/// inserts a call to __msan_init to the module's constructor list. +bool MemorySanitizer::doInitialization(Module &M) { + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(ClBlackListFile)); + C = &(M.getContext()); + unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0); + switch (PtrSize) { + case 64: + ShadowMask = kShadowMask64; + OriginOffset = kOriginOffset64; + break; + case 32: + ShadowMask = kShadowMask32; + OriginOffset = kOriginOffset32; + break; + default: + report_fatal_error("unsupported pointer size"); + break; + } + + IRBuilder<> IRB(*C); + IntptrTy = IRB.getIntPtrTy(TD); + OriginTy = IRB.getInt32Ty(); + + ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000); + OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); + + // Insert a call to __msan_init/__msan_track_origins into the module's CTORs. + appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction( + "__msan_init", IRB.getVoidTy(), NULL)), 0); + + new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, + IRB.getInt32(ClTrackOrigins), "__msan_track_origins"); + + return true; +} + +namespace { + +/// \brief A helper class that handles instrumentation of VarArg +/// functions on a particular platform. +/// +/// Implementations are expected to insert the instrumentation +/// necessary to propagate argument shadow through VarArg function +/// calls. Visit* methods are called during an InstVisitor pass over +/// the function, and should avoid creating new basic blocks. A new +/// instance of this class is created for each instrumented function. +struct VarArgHelper { + /// \brief Visit a CallSite. + virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0; + + /// \brief Visit a va_start call. + virtual void visitVAStartInst(VAStartInst &I) = 0; + + /// \brief Visit a va_copy call. + virtual void visitVACopyInst(VACopyInst &I) = 0; + + /// \brief Finalize function instrumentation. + /// + /// This method is called after visiting all interesting (see above) + /// instructions in a function. + virtual void finalizeInstrumentation() = 0; + + virtual ~VarArgHelper() {} +}; + +struct MemorySanitizerVisitor; + +VarArgHelper* +CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor); + +/// This class does all the work for a given function. Store and Load +/// instructions store and load corresponding shadow and origin +/// values. Most instructions propagate shadow from arguments to their +/// return values. Certain instructions (most importantly, BranchInst) +/// test their argument shadow and print reports (with a runtime call) if it's +/// non-zero. +struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { + Function &F; + MemorySanitizer &MS; + SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; + ValueMap<Value*, Value*> ShadowMap, OriginMap; + bool InsertChecks; + OwningPtr<VarArgHelper> VAHelper; + + // An unfortunate workaround for asymmetric lowering of va_arg stuff. + // See a comment in visitCallSite for more details. + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64FpEndOffset = 176; + + struct ShadowOriginAndInsertPoint { + Instruction *Shadow; + Instruction *Origin; + Instruction *OrigIns; + ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I) + : Shadow(S), Origin(O), OrigIns(I) { } + ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { } + }; + SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList; + SmallVector<Instruction*, 16> StoreList; + + MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) + : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { + InsertChecks = !MS.BL->isIn(F); + DEBUG(if (!InsertChecks) + dbgs() << "MemorySanitizer is not inserting checks into '" + << F.getName() << "'\n"); + } + + void materializeStores() { + for (size_t i = 0, n = StoreList.size(); i < n; i++) { + StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]); + + IRBuilder<> IRB(&I); + Value *Val = I.getValueOperand(); + Value *Addr = I.getPointerOperand(); + Value *Shadow = getShadow(Val); + Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); + + StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); + (void)NewSI; + // If the store is volatile, add a check. + if (I.isVolatile()) + insertCheck(Val, &I); + if (ClCheckAccessAddress) + insertCheck(Addr, &I); + + if (ClTrackOrigins) { + if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) { + IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment()); + } else { + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + + Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow); + // TODO(eugenis): handle non-zero constant shadow by inserting an + // unconditional check (can not simply fail compilation as this could + // be in the dead code). + if (Cst) + continue; + + Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, + getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, MS.OriginStoreWeights); + IRBuilder<> IRBNewBlock(CheckTerm); + IRBNewBlock.CreateAlignedStore(getOrigin(Val), + getOriginPtr(Addr, IRBNewBlock), I.getAlignment()); + } + } + } + } + + void materializeChecks() { + for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { + Instruction *Shadow = InstrumentationList[i].Shadow; + Instruction *OrigIns = InstrumentationList[i].OrigIns; + IRBuilder<> IRB(OrigIns); + DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); + Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, + getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), + /* Unreachable */ !ClKeepGoing, + MS.ColdCallWeights); + + IRB.SetInsertPoint(CheckTerm); + if (ClTrackOrigins) { + Instruction *Origin = InstrumentationList[i].Origin; + IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0), + MS.OriginTLS); + } + CallInst *Call = IRB.CreateCall(MS.WarningFn); + Call->setDebugLoc(OrigIns->getDebugLoc()); + IRB.CreateCall(MS.EmptyAsm); + DEBUG(dbgs() << " CHECK: " << *Cmp << "\n"); + } + DEBUG(dbgs() << "DONE:\n" << F); + } + + /// \brief Add MemorySanitizer instrumentation to a function. + bool runOnFunction() { + MS.initializeCallbacks(*F.getParent()); + if (!MS.TD) return false; + // Iterate all BBs in depth-first order and create shadow instructions + // for all instructions (where applicable). + // For PHI nodes we create dummy shadow PHIs which will be finalized later. + for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { + BasicBlock *BB = *DI; + visit(*BB); + } + + // Finalize PHI nodes. + for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { + PHINode *PN = ShadowPHINodes[i]; + PHINode *PNS = cast<PHINode>(getShadow(PN)); + PHINode *PNO = ClTrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0; + size_t NumValues = PN->getNumIncomingValues(); + for (size_t v = 0; v < NumValues; v++) { + PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); + if (PNO) + PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v)); + } + } + + VAHelper->finalizeInstrumentation(); + + // Delayed instrumentation of StoreInst. + // This may add new checks to be inserted later. + materializeStores(); + + // Insert shadow value checks. + materializeChecks(); + + return true; + } + + /// \brief Compute the shadow type that corresponds to a given Value. + Type *getShadowTy(Value *V) { + return getShadowTy(V->getType()); + } + + /// \brief Compute the shadow type that corresponds to a given Type. + Type *getShadowTy(Type *OrigTy) { + if (!OrigTy->isSized()) { + return 0; + } + // For integer type, shadow is the same as the original type. + // This may return weird-sized types like i1. + if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) + return IT; + if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) + return VectorType::getInteger(VT); + if (StructType *ST = dyn_cast<StructType>(OrigTy)) { + SmallVector<Type*, 4> Elements; + for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) + Elements.push_back(getShadowTy(ST->getElementType(i))); + StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked()); + DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); + return Res; + } + uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy); + return IntegerType::get(*MS.C, TypeSize); + } + + /// \brief Flatten a vector type. + Type *getShadowTyNoVec(Type *ty) { + if (VectorType *vt = dyn_cast<VectorType>(ty)) + return IntegerType::get(*MS.C, vt->getBitWidth()); + return ty; + } + + /// \brief Convert a shadow value to it's flattened variant. + Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) { + Type *Ty = V->getType(); + Type *NoVecTy = getShadowTyNoVec(Ty); + if (Ty == NoVecTy) return V; + return IRB.CreateBitCast(V, NoVecTy); + } + + /// \brief Compute the shadow address that corresponds to a given application + /// address. + /// + /// Shadow = Addr & ~ShadowMask. + Value *getShadowPtr(Value *Addr, Type *ShadowTy, + IRBuilder<> &IRB) { + Value *ShadowLong = + IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask)); + return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); + } + + /// \brief Compute the origin address that corresponds to a given application + /// address. + /// + /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL + Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) { + Value *ShadowLong = + IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask)); + Value *Add = + IRB.CreateAdd(ShadowLong, + ConstantInt::get(MS.IntptrTy, MS.OriginOffset)); + Value *SecondAnd = + IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL)); + return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0)); + } + + /// \brief Compute the shadow address for a given function argument. + /// + /// Shadow = ParamTLS+ArgOffset. + Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), + "_msarg"); + } + + /// \brief Compute the origin address for a given function argument. + Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + if (!ClTrackOrigins) return 0; + Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), + "_msarg_o"); + } + + /// \brief Compute the shadow address for a retval. + Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) { + Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy); + return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), + "_msret"); + } + + /// \brief Compute the origin address for a retval. + Value *getOriginPtrForRetval(IRBuilder<> &IRB) { + // We keep a single origin for the entire retval. Might be too optimistic. + return MS.RetvalOriginTLS; + } + + /// \brief Set SV to be the shadow value for V. + void setShadow(Value *V, Value *SV) { + assert(!ShadowMap.count(V) && "Values may only have one shadow"); + ShadowMap[V] = SV; + } + + /// \brief Set Origin to be the origin value for V. + void setOrigin(Value *V, Value *Origin) { + if (!ClTrackOrigins) return; + assert(!OriginMap.count(V) && "Values may only have one origin"); + DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n"); + OriginMap[V] = Origin; + } + + /// \brief Create a clean shadow value for a given value. + /// + /// Clean shadow (all zeroes) means all bits of the value are defined + /// (initialized). + Value *getCleanShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return Constant::getNullValue(ShadowTy); + } + + /// \brief Create a dirty shadow of a given shadow type. + Constant *getPoisonedShadow(Type *ShadowTy) { + assert(ShadowTy); + if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) + return Constant::getAllOnesValue(ShadowTy); + StructType *ST = cast<StructType>(ShadowTy); + SmallVector<Constant *, 4> Vals; + for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) + Vals.push_back(getPoisonedShadow(ST->getElementType(i))); + return ConstantStruct::get(ST, Vals); + } + + /// \brief Create a clean (zero) origin. + Value *getCleanOrigin() { + return Constant::getNullValue(MS.OriginTy); + } + + /// \brief Get the shadow value for a given Value. + /// + /// This function either returns the value set earlier with setShadow, + /// or extracts if from ParamTLS (for function arguments). + Value *getShadow(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) { + // For instructions the shadow is already stored in the map. + Value *Shadow = ShadowMap[V]; + if (!Shadow) { + DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent())); + (void)I; + assert(Shadow && "No shadow for a value"); + } + return Shadow; + } + if (UndefValue *U = dyn_cast<UndefValue>(V)) { + Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); + (void)U; + return AllOnes; + } + if (Argument *A = dyn_cast<Argument>(V)) { + // For arguments we compute the shadow on demand and store it in the map. + Value **ShadowPtr = &ShadowMap[V]; + if (*ShadowPtr) + return *ShadowPtr; + Function *F = A->getParent(); + IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); + unsigned ArgOffset = 0; + for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + if (!AI->getType()->isSized()) { + DEBUG(dbgs() << "Arg is not sized\n"); + continue; + } + unsigned Size = AI->hasByValAttr() + ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType()) + : MS.TD->getTypeAllocSize(AI->getType()); + if (A == AI) { + Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset); + if (AI->hasByValAttr()) { + // ByVal pointer itself has clean shadow. We copy the actual + // argument shadow to the underlying memory. + Value *Cpy = EntryIRB.CreateMemCpy( + getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), + Base, Size, AI->getParamAlignment()); + DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); + (void)Cpy; + *ShadowPtr = getCleanShadow(V); + } else { + *ShadowPtr = EntryIRB.CreateLoad(Base); + } + DEBUG(dbgs() << " ARG: " << *AI << " ==> " << + **ShadowPtr << "\n"); + if (ClTrackOrigins) { + Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset); + setOrigin(A, EntryIRB.CreateLoad(OriginPtr)); + } + } + ArgOffset += DataLayout::RoundUpAlignment(Size, 8); + } + assert(*ShadowPtr && "Could not find shadow for an argument"); + return *ShadowPtr; + } + // For everything else the shadow is zero. + return getCleanShadow(V); + } + + /// \brief Get the shadow for i-th argument of the instruction I. + Value *getShadow(Instruction *I, int i) { + return getShadow(I->getOperand(i)); + } + + /// \brief Get the origin for a value. + Value *getOrigin(Value *V) { + if (!ClTrackOrigins) return 0; + if (isa<Instruction>(V) || isa<Argument>(V)) { + Value *Origin = OriginMap[V]; + if (!Origin) { + DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n"); + Origin = getCleanOrigin(); + } + return Origin; + } + return getCleanOrigin(); + } + + /// \brief Get the origin for i-th argument of the instruction I. + Value *getOrigin(Instruction *I, int i) { + return getOrigin(I->getOperand(i)); + } + + /// \brief Remember the place where a shadow check should be inserted. + /// + /// This location will be later instrumented with a check that will print a + /// UMR warning in runtime if the value is not fully defined. + void insertCheck(Value *Val, Instruction *OrigIns) { + assert(Val); + if (!InsertChecks) return; + Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val)); + if (!Shadow) return; +#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) && + "Can only insert checks for integer and vector shadow types"); +#endif + Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val)); + InstrumentationList.push_back( + ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns)); + } + + //------------------- Visitors. + + /// \brief Instrument LoadInst + /// + /// Loads the corresponding shadow and (optionally) origin. + /// Optionally, checks that the load address is fully defined. + void visitLoadInst(LoadInst &I) { + assert(I.getType()->isSized() && "Load type must have size"); + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + Value *Addr = I.getPointerOperand(); + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + + if (ClCheckAccessAddress) + insertCheck(I.getPointerOperand(), &I); + + if (ClTrackOrigins) + setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment())); + } + + /// \brief Instrument StoreInst + /// + /// Stores the corresponding shadow and (optionally) origin. + /// Optionally, checks that the store address is fully defined. + /// Volatile stores check that the value being stored is fully defined. + void visitStoreInst(StoreInst &I) { + StoreList.push_back(&I); + } + + // Vector manipulation. + void visitExtractElementInst(ExtractElementInst &I) { + insertCheck(I.getOperand(1), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1), + "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitInsertElementInst(InsertElementInst &I) { + insertCheck(I.getOperand(2), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1), + I.getOperand(2), "_msprop")); + setOriginForNaryOp(I); + } + + void visitShuffleVectorInst(ShuffleVectorInst &I) { + insertCheck(I.getOperand(2), &I); + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), + I.getOperand(2), "_msprop")); + setOriginForNaryOp(I); + } + + // Casts. + void visitSExtInst(SExtInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitZExtInst(ZExtInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitTruncInst(TruncInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitBitCastInst(BitCastInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I))); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitPtrToIntInst(PtrToIntInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, + "_msprop_ptrtoint")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitIntToPtrInst(IntToPtrInst &I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, + "_msprop_inttoptr")); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitFPToSIInst(CastInst& I) { handleShadowOr(I); } + void visitFPToUIInst(CastInst& I) { handleShadowOr(I); } + void visitSIToFPInst(CastInst& I) { handleShadowOr(I); } + void visitUIToFPInst(CastInst& I) { handleShadowOr(I); } + void visitFPExtInst(CastInst& I) { handleShadowOr(I); } + void visitFPTruncInst(CastInst& I) { handleShadowOr(I); } + + /// \brief Propagate shadow for bitwise AND. + /// + /// This code is exact, i.e. if, for example, a bit in the left argument + /// is defined and 0, then neither the value not definedness of the + /// corresponding bit in B don't affect the resulting shadow. + void visitAnd(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // "And" of 0 and a poisoned value results in unpoisoned value. + // 1&1 => 1; 0&1 => 0; p&1 => p; + // 1&0 => 0; 0&0 => 0; p&0 => 0; + // 1&p => p; 0&p => 0; p&p => p; + // S = (S1 & S2) | (V1 & S2) | (S1 & V2) + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *V1 = I.getOperand(0); + Value *V2 = I.getOperand(1); + if (V1->getType() != S1->getType()) { + V1 = IRB.CreateIntCast(V1, S1->getType(), false); + V2 = IRB.CreateIntCast(V2, S2->getType(), false); + } + Value *S1S2 = IRB.CreateAnd(S1, S2); + Value *V1S2 = IRB.CreateAnd(V1, S2); + Value *S1V2 = IRB.CreateAnd(S1, V2); + setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2))); + setOriginForNaryOp(I); + } + + void visitOr(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // "Or" of 1 and a poisoned value results in unpoisoned value. + // 1|1 => 1; 0|1 => 1; p|1 => 1; + // 1|0 => 1; 0|0 => 0; p|0 => p; + // 1|p => 1; 0|p => p; p|p => p; + // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2) + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *V1 = IRB.CreateNot(I.getOperand(0)); + Value *V2 = IRB.CreateNot(I.getOperand(1)); + if (V1->getType() != S1->getType()) { + V1 = IRB.CreateIntCast(V1, S1->getType(), false); + V2 = IRB.CreateIntCast(V2, S2->getType(), false); + } + Value *S1S2 = IRB.CreateAnd(S1, S2); + Value *V1S2 = IRB.CreateAnd(V1, S2); + Value *S1V2 = IRB.CreateAnd(S1, V2); + setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2))); + setOriginForNaryOp(I); + } + + /// \brief Propagate origin for an instruction. + /// + /// This is a general case of origin propagation. For an Nary operation, + /// is set to the origin of an argument that is not entirely initialized. + /// If there is more than one such arguments, the rightmost of them is picked. + /// It does not matter which one is picked if all arguments are initialized. + void setOriginForNaryOp(Instruction &I) { + if (!ClTrackOrigins) return; + IRBuilder<> IRB(&I); + Value *Origin = getOrigin(&I, 0); + for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) { + Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB); + Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)), + getOrigin(&I, Op), Origin); + } + setOrigin(&I, Origin); + } + + /// \brief Propagate shadow for a binary operation. + /// + /// Shadow = Shadow0 | Shadow1, all 3 must have the same type. + /// Bitwise OR is selected as an operation that will never lose even a bit of + /// poison. + void handleShadowOrBinary(Instruction &I) { + IRBuilder<> IRB(&I); + Value *Shadow0 = getShadow(&I, 0); + Value *Shadow1 = getShadow(&I, 1); + setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop")); + setOriginForNaryOp(I); + } + + /// \brief Propagate shadow for arbitrary operation. + /// + /// This is a general case of shadow propagation, used in all cases where we + /// don't know and/or care about what the operation actually does. + /// It converts all input shadow values to a common type (extending or + /// truncating as necessary), and bitwise OR's them. + /// + /// This is much cheaper than inserting checks (i.e. requiring inputs to be + /// fully initialized), and less prone to false positives. + // FIXME: is the casting actually correct? + // FIXME: merge this with handleShadowOrBinary. + void handleShadowOr(Instruction &I) { + IRBuilder<> IRB(&I); + Value *Shadow = getShadow(&I, 0); + for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) + Shadow = IRB.CreateOr( + Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false), + "_msprop"); + Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false); + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + + void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); } + + void handleDiv(Instruction &I) { + IRBuilder<> IRB(&I); + // Strict on the second argument. + insertCheck(I.getOperand(1), &I); + setShadow(&I, getShadow(&I, 0)); + setOrigin(&I, getOrigin(&I, 0)); + } + + void visitUDiv(BinaryOperator &I) { handleDiv(I); } + void visitSDiv(BinaryOperator &I) { handleDiv(I); } + void visitFDiv(BinaryOperator &I) { handleDiv(I); } + void visitURem(BinaryOperator &I) { handleDiv(I); } + void visitSRem(BinaryOperator &I) { handleDiv(I); } + void visitFRem(BinaryOperator &I) { handleDiv(I); } + + /// \brief Instrument == and != comparisons. + /// + /// Sometimes the comparison result is known even if some of the bits of the + /// arguments are not. + void handleEqualityComparison(ICmpInst &I) { + IRBuilder<> IRB(&I); + Value *A = I.getOperand(0); + Value *B = I.getOperand(1); + Value *Sa = getShadow(A); + Value *Sb = getShadow(B); + if (A->getType()->isPointerTy()) + A = IRB.CreatePointerCast(A, MS.IntptrTy); + if (B->getType()->isPointerTy()) + B = IRB.CreatePointerCast(B, MS.IntptrTy); + // A == B <==> (C = A^B) == 0 + // A != B <==> (C = A^B) != 0 + // Sc = Sa | Sb + Value *C = IRB.CreateXor(A, B); + Value *Sc = IRB.CreateOr(Sa, Sb); + // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now) + // Result is defined if one of the following is true + // * there is a defined 1 bit in C + // * C is fully defined + // Si = !(C & ~Sc) && Sc + Value *Zero = Constant::getNullValue(Sc->getType()); + Value *MinusOne = Constant::getAllOnesValue(Sc->getType()); + Value *Si = + IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero), + IRB.CreateICmpEQ( + IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero)); + Si->setName("_msprop_icmp"); + setShadow(&I, Si); + setOriginForNaryOp(I); + } + + /// \brief Instrument signed relational comparisons. + /// + /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by + /// propagating the highest bit of the shadow. Everything else is delegated + /// to handleShadowOr(). + void handleSignedRelationalComparison(ICmpInst &I) { + Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); + Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); + Value* op = NULL; + CmpInst::Predicate pre = I.getPredicate(); + if (constOp0 && constOp0->isNullValue() && + (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { + op = I.getOperand(1); + } else if (constOp1 && constOp1->isNullValue() && + (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) { + op = I.getOperand(0); + } + if (op) { + IRBuilder<> IRB(&I); + Value* Shadow = + IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt"); + setShadow(&I, Shadow); + setOrigin(&I, getOrigin(op)); + } else { + handleShadowOr(I); + } + } + + void visitICmpInst(ICmpInst &I) { + if (ClHandleICmp && I.isEquality()) + handleEqualityComparison(I); + else if (ClHandleICmp && I.isSigned() && I.isRelational()) + handleSignedRelationalComparison(I); + else + handleShadowOr(I); + } + + void visitFCmpInst(FCmpInst &I) { + handleShadowOr(I); + } + + void handleShift(BinaryOperator &I) { + IRBuilder<> IRB(&I); + // If any of the S2 bits are poisoned, the whole thing is poisoned. + // Otherwise perform the same shift on S1. + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), + S2->getType()); + Value *V2 = I.getOperand(1); + Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2); + setShadow(&I, IRB.CreateOr(Shift, S2Conv)); + setOriginForNaryOp(I); + } + + void visitShl(BinaryOperator &I) { handleShift(I); } + void visitAShr(BinaryOperator &I) { handleShift(I); } + void visitLShr(BinaryOperator &I) { handleShift(I); } + + /// \brief Instrument llvm.memmove + /// + /// At this point we don't know if llvm.memmove will be inlined or not. + /// If we don't instrument it and it gets inlined, + /// our interceptor will not kick in and we will lose the memmove. + /// If we instrument the call here, but it does not get inlined, + /// we will memove the shadow twice: which is bad in case + /// of overlapping regions. So, we simply lower the intrinsic to a call. + /// + /// Similar situation exists for memcpy and memset. + void visitMemMoveInst(MemMoveInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemmoveFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + // Similar to memmove: avoid copying shadow twice. + // This is somewhat unfortunate as it may slowdown small constant memcpys. + // FIXME: consider doing manual inline for small constant sizes and proper + // alignment. + void visitMemCpyInst(MemCpyInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemcpyFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + // Same as memcpy. + void visitMemSetInst(MemSetInst &I) { + IRBuilder<> IRB(&I); + IRB.CreateCall3( + MS.MemsetFn, + IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)); + I.eraseFromParent(); + } + + void visitVAStartInst(VAStartInst &I) { + VAHelper->visitVAStartInst(I); + } + + void visitVACopyInst(VACopyInst &I) { + VAHelper->visitVACopyInst(I); + } + + void handleBswap(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *Op = I.getArgOperand(0); + Type *OpType = Op->getType(); + Function *BswapFunc = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1)); + setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op))); + setOrigin(&I, getOrigin(Op)); + } + + void visitIntrinsicInst(IntrinsicInst &I) { + switch (I.getIntrinsicID()) { + case llvm::Intrinsic::bswap: + handleBswap(I); break; + default: + visitInstruction(I); break; + } + } + + void visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite"); + if (CS.isCall()) { + CallInst *Call = cast<CallInst>(&I); + + // For inline asm, do the usual thing: check argument shadow and mark all + // outputs as clean. Note that any side effects of the inline asm that are + // not immediately visible in its constraints are not handled. + if (Call->isInlineAsm()) { + visitInstruction(I); + return; + } + + // Allow only tail calls with the same types, otherwise + // we may have a false positive: shadow for a non-void RetVal + // will get propagated to a void RetVal. + if (Call->isTailCall() && Call->getType() != Call->getParent()->getType()) + Call->setTailCall(false); + + assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere"); + + // We are going to insert code that relies on the fact that the callee + // will become a non-readonly function after it is instrumented by us. To + // prevent this code from being optimized out, mark that function + // non-readonly in advance. + if (Function *Func = Call->getCalledFunction()) { + // Clear out readonly/readnone attributes. + AttrBuilder B; + B.addAttribute(Attributes::ReadOnly) + .addAttribute(Attributes::ReadNone); + Func->removeAttribute(AttributeSet::FunctionIndex, + Attributes::get(Func->getContext(), B)); + } + } + IRBuilder<> IRB(&I); + unsigned ArgOffset = 0; + DEBUG(dbgs() << " CallSite: " << I << "\n"); + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + unsigned i = ArgIt - CS.arg_begin(); + if (!A->getType()->isSized()) { + DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n"); + continue; + } + unsigned Size = 0; + Value *Store = 0; + // Compute the Shadow for arg even if it is ByVal, because + // in that case getShadow() will copy the actual arg shadow to + // __msan_param_tls. + Value *ArgShadow = getShadow(A); + Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset); + DEBUG(dbgs() << " Arg#" << i << ": " << *A << + " Shadow: " << *ArgShadow << "\n"); + if (CS.paramHasAttr(i + 1, Attributes::ByVal)) { + assert(A->getType()->isPointerTy() && + "ByVal argument is not a pointer!"); + Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType()); + unsigned Alignment = CS.getParamAlignment(i + 1); + Store = IRB.CreateMemCpy(ArgShadowBase, + getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), + Size, Alignment); + } else { + Size = MS.TD->getTypeAllocSize(A->getType()); + Store = IRB.CreateStore(ArgShadow, ArgShadowBase); + } + if (ClTrackOrigins) + IRB.CreateStore(getOrigin(A), + getOriginPtrForArgument(A, IRB, ArgOffset)); + assert(Size != 0 && Store != 0); + DEBUG(dbgs() << " Param:" << *Store << "\n"); + ArgOffset += DataLayout::RoundUpAlignment(Size, 8); + } + DEBUG(dbgs() << " done with call args\n"); + + FunctionType *FT = + cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0)); + if (FT->isVarArg()) { + VAHelper->visitCallSite(CS, IRB); + } + + // Now, get the shadow for the RetVal. + if (!I.getType()->isSized()) return; + IRBuilder<> IRBBefore(&I); + // Untill we have full dynamic coverage, make sure the retval shadow is 0. + Value *Base = getShadowPtrForRetval(&I, IRBBefore); + IRBBefore.CreateStore(getCleanShadow(&I), Base); + Instruction *NextInsn = 0; + if (CS.isCall()) { + NextInsn = I.getNextNode(); + } else { + BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest(); + if (!NormalDest->getSinglePredecessor()) { + // FIXME: this case is tricky, so we are just conservative here. + // Perhaps we need to split the edge between this BB and NormalDest, + // but a naive attempt to use SplitEdge leads to a crash. + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + return; + } + NextInsn = NormalDest->getFirstInsertionPt(); + assert(NextInsn && + "Could not find insertion point for retval shadow load"); + } + IRBuilder<> IRBAfter(NextInsn); + setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter), + "_msret")); + if (ClTrackOrigins) + setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter))); + } + + void visitReturnInst(ReturnInst &I) { + IRBuilder<> IRB(&I); + if (Value *RetVal = I.getReturnValue()) { + // Set the shadow for the RetVal. + Value *Shadow = getShadow(RetVal); + Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); + DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n"); + IRB.CreateStore(Shadow, ShadowPtr); + if (ClTrackOrigins) + IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); + } + } + + void visitPHINode(PHINode &I) { + IRBuilder<> IRB(&I); + ShadowPHINodes.push_back(&I); + setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(), + "_msphi_s")); + if (ClTrackOrigins) + setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), + "_msphi_o")); + } + + void visitAllocaInst(AllocaInst &I) { + setShadow(&I, getCleanShadow(&I)); + if (!ClPoisonStack) return; + IRBuilder<> IRB(I.getNextNode()); + uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType()); + if (ClPoisonStackWithCall) { + IRB.CreateCall2(MS.MsanPoisonStackFn, + IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), + ConstantInt::get(MS.IntptrTy, Size)); + } else { + Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB); + IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern), + Size, I.getAlignment()); + } + + if (ClTrackOrigins) { + setOrigin(&I, getCleanOrigin()); + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + // We create a string with a description of the stack allocation and + // pass it into __msan_set_alloca_origin. + // It will be printed by the run-time if stack-originated UMR is found. + // The first 4 bytes of the string are set to '----' and will be replaced + // by __msan_va_arg_overflow_size_tls at the first call. + StackDescription << "----" << I.getName() << "@" << F.getName(); + Value *Descr = + createPrivateNonConstGlobalForString(*F.getParent(), + StackDescription.str()); + IRB.CreateCall3(MS.MsanSetAllocaOriginFn, + IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), + ConstantInt::get(MS.IntptrTy, Size), + IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())); + } + } + + void visitSelectInst(SelectInst& I) { + IRBuilder<> IRB(&I); + setShadow(&I, IRB.CreateSelect(I.getCondition(), + getShadow(I.getTrueValue()), getShadow(I.getFalseValue()), + "_msprop")); + if (ClTrackOrigins) + setOrigin(&I, IRB.CreateSelect(I.getCondition(), + getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue()))); + } + + void visitLandingPadInst(LandingPadInst &I) { + // Do nothing. + // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1 + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + + void visitGetElementPtrInst(GetElementPtrInst &I) { + handleShadowOr(I); + } + + void visitExtractValueInst(ExtractValueInst &I) { + IRBuilder<> IRB(&I); + Value *Agg = I.getAggregateOperand(); + DEBUG(dbgs() << "ExtractValue: " << I << "\n"); + Value *AggShadow = getShadow(Agg); + DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n"); + Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices()); + DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n"); + setShadow(&I, ResShadow); + setOrigin(&I, getCleanOrigin()); + } + + void visitInsertValueInst(InsertValueInst &I) { + IRBuilder<> IRB(&I); + DEBUG(dbgs() << "InsertValue: " << I << "\n"); + Value *AggShadow = getShadow(I.getAggregateOperand()); + Value *InsShadow = getShadow(I.getInsertedValueOperand()); + DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n"); + DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n"); + Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices()); + DEBUG(dbgs() << " Res: " << *Res << "\n"); + setShadow(&I, Res); + setOrigin(&I, getCleanOrigin()); + } + + void dumpInst(Instruction &I) { + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n"; + } else { + errs() << "ZZZ " << I.getOpcodeName() << "\n"; + } + errs() << "QQQ " << I << "\n"; + } + + void visitResumeInst(ResumeInst &I) { + DEBUG(dbgs() << "Resume: " << I << "\n"); + // Nothing to do here. + } + + void visitInstruction(Instruction &I) { + // Everything else: stop propagating and check for poisoned shadow. + if (ClDumpStrictInstructions) + dumpInst(I); + DEBUG(dbgs() << "DEFAULT: " << I << "\n"); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) + insertCheck(I.getOperand(i), &I); + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } +}; + +/// \brief AMD64-specific implementation of VarArgHelper. +struct VarArgAMD64Helper : public VarArgHelper { + // An unfortunate workaround for asymmetric lowering of va_arg stuff. + // See a comment in visitCallSite for more details. + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64FpEndOffset = 176; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgOverflowSize; + + SmallVector<CallInst*, 16> VAStartInstrumentationList; + + VarArgAMD64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { } + + enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + + ArgKind classifyArgument(Value* arg) { + // A very rough approximation of X86_64 argument classification rules. + Type *T = arg->getType(); + if (T->isFPOrFPVectorTy() || T->isX86_MMXTy()) + return AK_FloatingPoint; + if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) + return AK_GeneralPurpose; + if (T->isPointerTy()) + return AK_GeneralPurpose; + return AK_Memory; + } + + // For VarArg functions, store the argument shadow in an ABI-specific format + // that corresponds to va_list layout. + // We do this because Clang lowers va_arg in the frontend, and this pass + // only sees the low level code that deals with va_list internals. + // A much easier alternative (provided that Clang emits va_arg instructions) + // would have been to associate each live instance of va_list with a copy of + // MSanParamTLS, and extract shadow on va_arg() call in the argument list + // order. + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) { + unsigned GpOffset = 0; + unsigned FpOffset = AMD64GpEndOffset; + unsigned OverflowOffset = AMD64FpEndOffset; + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + ArgKind AK = classifyArgument(A); + if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) + AK = AK_Memory; + if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset) + AK = AK_Memory; + Value *Base; + switch (AK) { + case AK_GeneralPurpose: + Base = getShadowPtrForVAArgument(A, IRB, GpOffset); + GpOffset += 8; + break; + case AK_FloatingPoint: + Base = getShadowPtrForVAArgument(A, IRB, FpOffset); + FpOffset += 16; + break; + case AK_Memory: + uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset); + OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8); + } + IRB.CreateStore(MSV.getShadow(A), Base); + } + Constant *OverflowSize = + ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + /// \brief Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */24, /* alignment */16, false); + } + + void visitVACopyInst(VACopyInst &I) { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */ 24, /* alignment */ 16, false); + } + + void finalizeInstrumentation() { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + + Value *RegSaveAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, 16)), + Type::getInt64PtrTy(*MS.C)); + Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr = + MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, + AMD64FpEndOffset, 16); + + Value *OverflowArgAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, 8)), + Type::getInt64PtrTy(*MS.C)); + Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); + Value *OverflowArgAreaShadowPtr = + MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); + Value *SrcPtr = + getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); + } + } +}; + +VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor) { + return new VarArgAMD64Helper(Func, Msan, Visitor); +} + +} // namespace + +bool MemorySanitizer::runOnFunction(Function &F) { + MemorySanitizerVisitor Visitor(F, *this); + + // Clear out readonly/readnone attributes. + AttrBuilder B; + B.addAttribute(Attributes::ReadOnly) + .addAttribute(Attributes::ReadNone); + F.removeAttribute(AttributeSet::FunctionIndex, + Attributes::get(F.getContext(), B)); + + return Visitor.runOnFunction(); +} diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 1fe12545d2..8f8d027dca 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -13,20 +13,20 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" +#include "llvm/Transforms/Instrumentation.h" +#include "MaximumSpanningTree.h" #include "ProfilingUtils.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "MaximumSpanningTree.h" using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index cc27146ebc..8aefe5901c 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -45,24 +45,24 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-path-profiling" -#include "llvm/DerivedTypes.h" +#include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" #include "llvm/Analysis/PathNumbering.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/InstrTypes.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/TypeBuilder.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Instrumentation.h" +#include "llvm/TypeBuilder.h" #include <vector> #define HASH_THRESHHOLD 100000 diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c6244a55c9..f14a5d8a1e 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,27 +21,27 @@ #define DEBUG_TYPE "tsan" +#include "llvm/Transforms/Instrumentation.h" #include "BlackList.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" using namespace llvm; @@ -78,6 +78,7 @@ struct ThreadSanitizer : public FunctionPass { static char ID; // Pass identification, replacement for typeid. private: + void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I); bool instrumentAtomic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, @@ -97,6 +98,10 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanWrite[kNumberOfAccessSizes]; Function *TsanAtomicLoad[kNumberOfAccessSizes]; Function *TsanAtomicStore[kNumberOfAccessSizes]; + Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes]; + Function *TsanAtomicCAS[kNumberOfAccessSizes]; + Function *TsanAtomicThreadFence; + Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; }; } // namespace @@ -126,18 +131,8 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { report_fatal_error("ThreadSanitizer interface function redefined"); } -bool ThreadSanitizer::doInitialization(Module &M) { - TD = getAnalysisIfAvailable<DataLayout>(); - if (!TD) - return false; - BL.reset(new BlackList(ClBlackListFile)); - - // Always insert a call to __tsan_init into the module's CTORs. +void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); - Value *TsanInit = M.getOrInsertFunction("__tsan_init", - IRB.getVoidTy(), NULL); - appendToGlobalCtors(M, cast<Function>(TsanInit), 0); - // Initialize the callbacks. TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); @@ -167,10 +162,58 @@ bool ThreadSanitizer::doInitialization(Module &M) { TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, NULL)); + + for (int op = AtomicRMWInst::FIRST_BINOP; + op <= AtomicRMWInst::LAST_BINOP; ++op) { + TsanAtomicRMW[op][i] = NULL; + const char *NamePart = NULL; + if (op == AtomicRMWInst::Xchg) + NamePart = "_exchange"; + else if (op == AtomicRMWInst::Add) + NamePart = "_fetch_add"; + else if (op == AtomicRMWInst::Sub) + NamePart = "_fetch_sub"; + else if (op == AtomicRMWInst::And) + NamePart = "_fetch_and"; + else if (op == AtomicRMWInst::Or) + NamePart = "_fetch_or"; + else if (op == AtomicRMWInst::Xor) + NamePart = "_fetch_xor"; + else if (op == AtomicRMWInst::Nand) + NamePart = "_fetch_nand"; + else + continue; + SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); + TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( + RMWName, Ty, PtrTy, Ty, OrdTy, NULL)); + } + + SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + "_compare_exchange_val"); + TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, NULL)); } TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); + TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); +} + +bool ThreadSanitizer::doInitialization(Module &M) { + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) + return false; + BL.reset(new BlackList(ClBlackListFile)); + + // Always insert a call to __tsan_init into the module's CTORs. + IRBuilder<> IRB(M.getContext()); + Value *TsanInit = M.getOrInsertFunction("__tsan_init", + IRB.getVoidTy(), NULL); + appendToGlobalCtors(M, cast<Function>(TsanInit), 0); + return true; } @@ -253,14 +296,15 @@ static bool isAtomic(Instruction *I) { return true; if (isa<AtomicCmpXchgInst>(I)) return true; - if (FenceInst *FI = dyn_cast<FenceInst>(I)) - return FI->getSynchScope() == CrossThread; + if (isa<FenceInst>(I)) + return true; return false; } bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; if (BL->isIn(F)) return false; + initializeCallbacks(*F.getParent()); SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; @@ -354,17 +398,39 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { switch (ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. - case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. - case Acquire: v = 1 << 2; break; - case Release: v = 1 << 3; break; - case AcquireRelease: v = 1 << 4; break; - case SequentiallyConsistent: v = 1 << 5; break; + case Monotonic: v = 0; break; + // case Consume: v = 1; break; // Not specified yet. + case Acquire: v = 2; break; + case Release: v = 3; break; + case AcquireRelease: v = 4; break; + case SequentiallyConsistent: v = 5; break; } - // +100500 is temporal to migrate to new enum values. - return IRB->getInt32(v + 100500); + return IRB->getInt32(v); } +static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { + uint32_t v = 0; + switch (ord) { + case NotAtomic: assert(false); + case Unordered: // Fall-through. + case Monotonic: v = 0; break; + // case Consume: v = 1; break; // Not specified yet. + case Acquire: v = 2; break; + case Release: v = 0; break; + case AcquireRelease: v = 2; break; + case SequentiallyConsistent: v = 5; break; + } + return IRB->getInt32(v); +} + +// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x +// standards. For background see C++11 standard. A slightly older, publically +// available draft of the standard (not entirely up-to-date, but close enough +// for casual browsing) is available here: +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf +// The following page contains more background information: +// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ + bool ThreadSanitizer::instrumentAtomic(Instruction *I) { IRBuilder<> IRB(I); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { @@ -397,12 +463,45 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { CallInst *C = CallInst::Create(TsanAtomicStore[Idx], ArrayRef<Value*>(Args)); ReplaceInstWithInst(I, C); - } else if (isa<AtomicRMWInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<AtomicCmpXchgInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<FenceInst>(I)) { - // FIXME: Not yet supported. + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { + Value *Addr = RMWI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; + if (F == NULL) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), + createOrdering(&IRB, RMWI->getOrdering())}; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { + Value *Addr = CASI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), + IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), + createOrdering(&IRB, CASI->getOrdering()), + createFailOrdering(&IRB, CASI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { + Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; + Function *F = FI->getSynchScope() == SingleThread ? + TsanAtomicSignalFence : TsanAtomicThreadFence; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); } return true; } diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index b344952cc5..f43baf5a76 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -16,16 +16,16 @@ #define DEBUG_TYPE "adce" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index cee5502656..6214e3b703 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -27,12 +27,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Scalar.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 123ed0f4f3..e6abfdf581 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -15,7 +15,16 @@ #define DEBUG_TYPE "codegenprepare" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" @@ -23,14 +32,6 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -38,7 +39,6 @@ #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" @@ -125,7 +125,7 @@ namespace { bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); bool OptimizeSelectInst(SelectInst *SI); - bool DupRetToEnableTailCallOpts(ReturnInst *RI); + bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); }; } @@ -194,9 +194,20 @@ bool CodeGenPrepare::runOnFunction(Function &F) { WorkList.insert(*II); } - for (SmallPtrSet<BasicBlock*, 8>::iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) - DeleteDeadBlock(*I); + // Delete the dead blocks and any of their dead successors. + MadeChange |= !WorkList.empty(); + while (!WorkList.empty()) { + BasicBlock *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + + DeleteDeadBlock(BB); + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } // Merge pairs of basic blocks with unconditional branches, connected by // a single edge. @@ -689,10 +700,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode -bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { +bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { if (!TLI) return false; + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) + return false; + PHINode *PN = 0; BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); @@ -706,7 +721,6 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { return false; } - BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) return false; @@ -1319,9 +1333,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); - if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) - return DupRetToEnableTailCallOpts(RI); - if (SelectInst *SI = dyn_cast<SelectInst>(I)) return OptimizeSelectInst(SI); @@ -1339,6 +1350,8 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { while (CurInstIterator != BB.end()) MadeChange |= OptimizeInst(CurInstIterator++); + MadeChange |= DupRetToEnableTailCallOpts(&BB); + return MadeChange; } diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 369720b3dc..27efde53cd 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -20,14 +20,14 @@ #define DEBUG_TYPE "constprop" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constant.h" +#include "llvm/DataLayout.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <set> using namespace llvm; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 3ec6f3dcc3..b5a2a25ba0 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,15 +13,15 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumPhis, "Number of phis propagated"); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index a2e074fae8..f260331c6d 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -18,12 +18,12 @@ #define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 736cc05e04..124892887c 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -17,25 +17,25 @@ #define DEBUG_TYPE "dse" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" #include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 101009dd64..6b622c73f0 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -14,18 +14,18 @@ #define DEBUG_TYPE "early-cse" #include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/ScopedHashTable.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <deque> using namespace llvm; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f003e06699..1c540b240c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -17,11 +17,6 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" @@ -37,11 +32,16 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" +#include "llvm/DataLayout.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 6301aad610..486a349c55 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,8 +53,10 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" @@ -62,10 +64,8 @@ #include "llvm/Intrinsics.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumMerged , "Number of globals merged"); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 310fd6147a..29f5a10e09 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -26,28 +26,28 @@ #define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Type.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumWidened , "Number of indvars widened"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index e7ffa09f17..4a4cd705e2 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -13,28 +13,28 @@ #define DEBUG_TYPE "jump-threading" #include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumThreads, "Number of jumps threaded"); @@ -216,19 +216,24 @@ bool JumpThreading::runOnFunction(Function &F) { } /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { +/// thread across it. Stop scanning the block when passing the threshold. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, + unsigned Threshold) { /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I = BB->getFirstNonPHI(); // FIXME: THREADING will delete values that are just used to compute the // branch, so they shouldn't count against the duplication cost. - // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; for (; !isa<TerminatorInst>(I); ++I) { + + // Stop scanning the block if we've reached the threshold. + if (Size > Threshold) + return Size; + // Debugger intrinsics don't incur code size. if (isa<DbgInfoIntrinsic>(I)) continue; @@ -1337,7 +1342,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, return false; } - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold); if (JumpThreadCost > Threshold) { DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); @@ -1481,7 +1486,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, return false; } - unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold); if (DuplicationCost > Threshold) { DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 4818437c24..7ef1d34d3f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -32,27 +32,27 @@ #define DEBUG_TYPE "licm" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Constants.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include <algorithm> using namespace llvm; @@ -90,6 +90,8 @@ namespace { AU.addRequired<TargetLibraryInfo>(); } + using llvm::Pass::doFinalization; + bool doFinalization() { assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 3771f5aa97..9c67e327e2 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -16,11 +16,11 @@ #define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a44e798f12..7807e9bb4f 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -43,19 +43,20 @@ #define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -63,16 +64,83 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); namespace { + + class LoopIdiomRecognize; + + /// This class defines some utility functions for loop idiom recognization. + class LIRUtil { + public: + /// Return true iff the block contains nothing but an uncondition branch + /// (aka goto instruction). + static bool isAlmostEmpty(BasicBlock *); + + static BranchInst *getBranch(BasicBlock *BB) { + return dyn_cast<BranchInst>(BB->getTerminator()); + } + + /// Return the condition of the branch terminating the given basic block. + static Value *getBrCondtion(BasicBlock *); + + /// Derive the precondition block (i.e the block that guards the loop + /// preheader) from the given preheader. + static BasicBlock *getPrecondBb(BasicBlock *PreHead); + }; + + /// This class is to recoginize idioms of population-count conducted in + /// a noncountable loop. Currently it only recognizes this pattern: + /// \code + /// while(x) {cnt++; ...; x &= x - 1; ...} + /// \endcode + class NclPopcountRecognize { + LoopIdiomRecognize &LIR; + Loop *CurLoop; + BasicBlock *PreCondBB; + + typedef IRBuilder<> IRBuilderTy; + + public: + explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR); + bool recognize(); + + private: + /// Take a glimpse of the loop to see if we need to go ahead recoginizing + /// the idiom. + bool preliminaryScreen(); + + /// Check if the given conditional branch is based on the comparison + /// beween a variable and zero, and if the variable is non-zero, the + /// control yeilds to the loop entry. If the branch matches the behavior, + /// the variable involved in the comparion is returned. This function will + /// be called to see if the precondition and postcondition of the loop + /// are in desirable form. + Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; + + /// Return true iff the idiom is detected in the loop. and 1) \p CntInst + /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the corresponding phi node. 3) \p Var is set to the value + /// whose population bits are being counted. + bool detectIdiom + (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; + + /// Insert ctpop intrinsic function and some obviously dead instructions. + void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var); + + /// Create llvm.ctpop.* intrinsic function. + CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); + }; + class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; const DataLayout *TD; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; + const ScalarTargetTransformInfo *STTI; public: static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0; } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -110,6 +178,36 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<TargetLibraryInfo>(); } + + const DataLayout *getDataLayout() { + return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>(); + } + + DominatorTree *getDominatorTree() { + return DT ? DT : (DT=&getAnalysis<DominatorTree>()); + } + + ScalarEvolution *getScalarEvolution() { + return SE ? SE : (SE = &getAnalysis<ScalarEvolution>()); + } + + TargetLibraryInfo *getTargetLibraryInfo() { + return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>()); + } + + const ScalarTargetTransformInfo *getScalarTargetTransformInfo() { + if (!STTI) { + TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + if (TTI) STTI = TTI->getScalarTargetTransformInfo(); + } + return STTI; + } + + Loop *getLoop() const { return CurLoop; } + + private: + bool runOnNoncountableLoop(); + bool runOnCountableLoop(); }; } @@ -172,24 +270,393 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, deleteDeadInstruction(I, SE, TLI); } -bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { - CurLoop = L; +//===----------------------------------------------------------------------===// +// +// Implementation of LIRUtil +// +//===----------------------------------------------------------------------===// - // If the loop could not be converted to canonical form, it must have an - // indirectbr in it, just give up. - if (!L->getLoopPreheader()) +// This fucntion will return true iff the given block contains nothing but goto. +// A typical usage of this function is to check if the preheader fucntion is +// "almost" empty such that generated intrinsic function can be moved across +// preheader and to be placed at the end of the preconditiona block without +// concerning of breaking data dependence. +bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { + if (BranchInst *Br = getBranch(BB)) { + return Br->isUnconditional() && BB->size() == 1; + } + return false; +} + +Value *LIRUtil::getBrCondtion(BasicBlock *BB) { + BranchInst *Br = getBranch(BB); + return Br ? Br->getCondition() : 0; +} + +BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { + if (BasicBlock *BB = PreHead->getSinglePredecessor()) { + BranchInst *Br = getBranch(BB); + return Br && Br->isConditional() ? BB : 0; + } + return 0; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of NclPopcountRecognize +// +//===----------------------------------------------------------------------===// + +NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { +} + +bool NclPopcountRecognize::preliminaryScreen() { + const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo(); + if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast) return false; - // Disable loop idiom recognition if the function's name is a common idiom. - StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + // Counting population are usually conducted by few arithmetic instrutions. + // Such instructions can be easilly "absorbed" by vacant slots in a + // non-compact loop. Therefore, recognizing popcount idiom only makes sense + // in a compact loop. + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) return false; - // The trip count of the loop must be analyzable. - SE = &getAnalysis<ScalarEvolution>(); - if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + BasicBlock *LoopBody = *(CurLoop->block_begin()); + if (LoopBody->size() >= 20) { + // The loop is too big, bail out. + return false; + } + + // It should have a preheader containing nothing but a goto instruction. + BasicBlock *PreHead = CurLoop->getLoopPreheader(); + if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead)) + return false; + + // It should have a precondition block where the generated popcount instrinsic + // function will be inserted. + PreCondBB = LIRUtil::getPrecondBb(PreHead); + if (!PreCondBB) + return false; + + return true; +} + +Value *NclPopcountRecognize::matchCondition (BranchInst *Br, + BasicBlock *LoopEntry) const { + if (!Br || !Br->isConditional()) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); + if (!Cond) + return 0; + + ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); + if (!CmpZero || !CmpZero->isZero()) + return 0; + + ICmpInst::Predicate Pred = Cond->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || + (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) + return Cond->getOperand(0); + + return 0; +} + +bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, + PHINode *&CntPhi, + Value *&Var) const { + // Following code tries to detect this idiom: + // + // if (x0 != 0) + // goto loop-exit // the precondition of the loop + // cnt0 = init-val; + // do { + // x1 = phi (x0, x2); + // cnt1 = phi(cnt0, cnt2); + // + // cnt2 = cnt1 + 1; + // ... + // x2 = x1 & (x1 - 1); + // ... + // } while(x != 0); + // + // loop-exit: + // + + // step 1: Check to see if the look-back branch match this pattern: + // "if (a!=0) goto loop-entry". + BasicBlock *LoopEntry; + Instruction *DefX2, *CountInst; + Value *VarX1, *VarX0; + PHINode *PhiX, *CountPhi; + + DefX2 = CountInst = 0; + VarX1 = VarX0 = 0; + PhiX = CountPhi = 0; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + { + if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry)) + DefX2 = dyn_cast<Instruction>(T); + else + return false; + } + + // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" + { + if (DefX2->getOpcode() != Instruction::And) + return false; + + BinaryOperator *SubOneOp; + + if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0)))) + VarX1 = DefX2->getOperand(1); + else { + VarX1 = DefX2->getOperand(0); + SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1)); + } + if (!SubOneOp) + return false; + + Instruction *SubInst = cast<Instruction>(SubOneOp); + ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1)); + if (!Dec || + !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || + (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) { + return false; + } + } + + // step 3: Check the recurrence of variable X + { + PhiX = dyn_cast<PHINode>(VarX1); + if (!PhiX || + (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { + return false; + } + } + + // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 + { + CountInst = NULL; + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), + IterE = LoopEntry->end(); Iter != IterE; Iter++) { + Instruction *Inst = Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0)); + if (!Phi || Phi->getParent() != LoopEntry) + continue; + + // Check if the result of the instruction is live of the loop. + bool LiveOutLoop = false; + for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); + I != E; I++) { + if ((cast<Instruction>(*I))->getParent() != LoopEntry) { + LiveOutLoop = true; break; + } + } + + if (LiveOutLoop) { + CountInst = Inst; + CountPhi = Phi; + break; + } + } + + if (!CountInst) + return false; + } + + // step 5: check if the precondition is in this form: + // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" + { + BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); + Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader()); + if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) + return false; + + CntInst = CountInst; + CntPhi = CountPhi; + Var = T; + } + + return true; +} + +void NclPopcountRecognize::transform(Instruction *CntInst, + PHINode *CntPhi, Value *Var) { + + ScalarEvolution *SE = LIR.getScalarEvolution(); + TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo(); + BasicBlock *PreHead = CurLoop->getLoopPreheader(); + BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); + const DebugLoc DL = CntInst->getDebugLoc(); + + // Assuming before transformation, the loop is following: + // if (x) // the precondition + // do { cnt++; x &= x - 1; } while(x); + + // Step 1: Insert the ctpop instruction at the end of the precondition block + IRBuilderTy Builder(PreCondBr); + Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; + { + PopCnt = createPopcntIntrinsic(Builder, Var, DL); + NewCount = PopCntZext = + Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType())); + + if (NewCount != PopCnt) + (cast<Instruction>(NewCount))->setDebugLoc(DL); + + // TripCnt is exactly the number of iterations the loop has + TripCnt = NewCount; + + // If the popoulation counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); + ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); + if (!InitConst || !InitConst->isZero()) { + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + (cast<Instruction>(NewCount))->setDebugLoc(DL); + } + } + + // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to + // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic + // function would be partial dead code, and downstream passes will drag + // it back from the precondition block to the preheader. + { + ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition()); + + Value *Opnd0 = PopCntZext; + Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); + if (PreCond->getOperand(0) != Var) + std::swap(Opnd0, Opnd1); + + ICmpInst *NewPreCond = + cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); + PreCond->replaceAllUsesWith(NewPreCond); + + deleteDeadInstruction(PreCond, *SE, TLI); + } + + // Step 3: Note that the population count is exactly the trip count of the + // loop in question, which enble us to to convert the loop from noncountable + // loop into a countable one. The benefit is twofold: + // + // - If the loop only counts population, the entire loop become dead after + // the transformation. It is lots easier to prove a countable loop dead + // than to prove a noncountable one. (In some C dialects, a infite loop + // isn't dead even if it computes nothing useful. In general, DCE needs + // to prove a noncountable loop finite before safely delete it.) + // + // - If the loop also performs something else, it remains alive. + // Since it is transformed to countable form, it can be aggressively + // optimized by some optimizations which are in general not applicable + // to a noncountable loop. + // + // After this step, this loop (conceptually) would look like following: + // newcnt = __builtin_ctpop(x); + // t = newcnt; + // if (x) + // do { cnt++; x &= x-1; t--) } while (t > 0); + BasicBlock *Body = *(CurLoop->block_begin()); + { + BranchInst *LbBr = LIRUtil::getBranch(Body); + ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); + Type *Ty = TripCnt->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); + + Builder.SetInsertPoint(LbCond); + Value *Opnd1 = cast<Value>(TcPhi); + Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1)); + Instruction *TcDec = + cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); + + TcPhi->addIncoming(TripCnt, PreHead); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? + CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0))); + } + + // Step 4: All the references to the original population counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctpop(). + { + SmallVector<Value *, 4> CntUses; + for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end(); + I != E; I++) { + if (cast<Instruction>(*I)->getParent() != Body) + CntUses.push_back(*I); + } + for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) { + (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount); + } + } + + // step 5: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} + +CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, + Value *Val, DebugLoc DL) { + Value *Ops[] = { Val }; + Type *Tys[] = { Val->getType() }; + + Module *M = (*(CurLoop->block_begin()))->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +/// recognize - detect population count idiom in a non-countable loop. If +/// detected, transform the relevant code to popcount intrinsic function +/// call, and return true; otherwise, return false. +bool NclPopcountRecognize::recognize() { + + if (!LIR.getScalarTargetTransformInfo()) return false; - const SCEV *BECount = SE->getBackedgeTakenCount(L); + + LIR.getScalarEvolution(); + + if (!preliminaryScreen()) + return false; + + Instruction *CntInst; + PHINode *CntPhi; + Value *Val; + if (!detectIdiom(CntInst, CntPhi, Val)) + return false; + + transform(CntInst, CntPhi, Val); + return true; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of LoopIdiomRecognize +// +//===----------------------------------------------------------------------===// + +bool LoopIdiomRecognize::runOnCountableLoop() { + const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); if (isa<SCEVCouldNotCompute>(BECount)) return false; // If this loop executes exactly one time, then it should be peeled, not @@ -199,24 +666,27 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // We require target data for now. - TD = getAnalysisIfAvailable<DataLayout>(); - if (TD == 0) return false; + if (!getDataLayout()) + return false; + + getDominatorTree(); - DT = &getAnalysis<DominatorTree>(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); + getTargetLibraryInfo(); + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" - << L->getHeader()->getParent()->getName() - << "] Loop %" << L->getHeader()->getName() << "\n"); + << CurLoop->getHeader()->getParent()->getName() + << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. - for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; - ++BI) { + for (Loop::block_iterator BI = CurLoop->block_begin(), + E = CurLoop->block_end(); BI != E; ++BI) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; @@ -226,6 +696,33 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { return MadeChange; } +bool LoopIdiomRecognize::runOnNoncountableLoop() { + NclPopcountRecognize Popcount(*this); + if (Popcount.recognize()) + return true; + + return false; +} + +bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + CurLoop = L; + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!L->getLoopPreheader()) + return false; + + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + + SE = &getAnalysis<ScalarEvolution>(); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); + return runOnNoncountableLoop(); +} + /// runOnLoopBlock - Process the specified block, which lives in a counted loop /// with the specified backedge count. This block is known to be in the current /// loop and not in any subloops. diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 558f62e6b4..10ba22434a 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-instsimplify" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Debug.h" #include "llvm/DataLayout.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions simplified"); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index abe07aa9d3..249baf5164 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -13,20 +13,20 @@ #define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; #define MAX_HEADER_SIZE 16 diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 958348d9fa..d571ba3fe0 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -54,27 +54,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/AddressingMode.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/Support/Debug.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 0d781ac977..2b15528411 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -13,16 +13,16 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-unroll" -#include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/DataLayout.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/DataLayout.h" #include <climits> using namespace llvm; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 047b43eb84..d41da4a9a9 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -28,25 +28,25 @@ #define DEBUG_TYPE "loop-unswitch" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <map> #include <set> diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 517657cf52..26b6269f42 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include <list> diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index dfdf50549d..ce397658bf 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -29,9 +29,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // A handy option to enable/disable all optimizations in this file. @@ -132,12 +132,12 @@ namespace { // ARC Utilities. //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Intrinsics.h" #include "llvm/Module.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/Transforms/Utils/Local.h" namespace { /// InstructionClass - A simple classification for instructions. @@ -660,9 +660,9 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { // ARC AliasAnalysis. //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" namespace { /// ObjCARCAliasAnalysis - This is a simple alias analysis @@ -912,8 +912,8 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // ARC autorelease pool elimination. //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" namespace { /// ObjCARCAPElim - Autorelease pool elimination. @@ -1093,10 +1093,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) { // TODO: Delete release+retain pairs (rare). +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1788,8 +1788,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, @@ -1804,8 +1804,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, @@ -1818,8 +1818,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); ReleaseCallee = M->getOrInsertFunction( @@ -1834,8 +1834,8 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainCallee = M->getOrInsertFunction( @@ -1856,7 +1856,7 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { M->getOrInsertFunction( "objc_retainBlock", FunctionType::get(Params[0], Params, /*isVarArg=*/false), - AttrListPtr()); + AttributeSet()); } return RetainBlockCallee; } @@ -1865,8 +1865,8 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); AutoreleaseCallee = M->getOrInsertFunction( @@ -3756,9 +3756,9 @@ void ObjCARCOpt::releaseMemory() { // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#include "llvm/Operator.h" -#include "llvm/InlineAsm.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/InlineAsm.h" +#include "llvm/Operator.h" STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); @@ -3840,8 +3840,8 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { Type *I8XX = PointerType::getUnqual(I8X); Type *Params[] = { I8XX, I8X }; - AttrListPtr Attributes = AttrListPtr() - .addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = AttributeSet() + .addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)) .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture)); @@ -3860,8 +3860,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); @@ -3875,8 +3875,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes = - AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + AttributeSet Attributes = + AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 09687d8909..569439aaf4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -22,7 +22,12 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -30,16 +35,11 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -339,36 +339,6 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { } } -/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C -/// is repeated Weight times. -static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C, - APInt Weight) { - // For addition the result can be efficiently computed as the product of the - // constant and the weight. - if (Opcode == Instruction::Add) - return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight)); - - // The weight might be huge, so compute by repeated squaring to ensure that - // compile time is proportional to the logarithm of the weight. - Constant *Result = 0; - Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc. - // Visit the bits in Weight. - while (Weight != 0) { - // If the current bit in Weight is non-zero do Result = Result op Power. - if (Weight[0]) - Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power; - // Move on to the next bit if any more are non-zero. - Weight = Weight.lshr(1); - if (Weight.isMinValue()) - break; - // Square the power. - Power = ConstantExpr::get(Opcode, Power, Power); - } - - assert(Result && "Only positive weights supported!"); - return Result; -} - typedef std::pair<Value*, APInt> RepeatedValue; /// LinearizeExprTree - Given an associative binary expression, return the leaf @@ -382,9 +352,7 @@ typedef std::pair<Value*, APInt> RepeatedValue; /// op /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times /// -/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and -/// they are all non-constant except possibly for the last one, which if it is -/// constant will have weight one (Ops[N].second === 1). +/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct. /// /// This routine may modify the function, in which case it returns 'true'. The /// changes it makes may well be destructive, changing the value computed by 'I' @@ -455,10 +423,6 @@ static bool LinearizeExprTree(BinaryOperator *I, assert(Instruction::isAssociative(Opcode) && Instruction::isCommutative(Opcode) && "Expected an associative and commutative operation!"); - // If we see an absorbing element then the entire expression must be equal to - // it. For example, if this is a multiplication expression and zero occurs as - // an operand somewhere in it then the result of the expression must be zero. - Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, I->getType()); // Visit all operands of the expression, keeping track of their weight (the // number of paths from the expression root to the operand, or if you like @@ -506,13 +470,6 @@ static bool LinearizeExprTree(BinaryOperator *I, DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); assert(!Op->use_empty() && "No uses, so how did we get to it?!"); - // If the expression contains an absorbing element then there is no need - // to analyze it further: it must evaluate to the absorbing element. - if (Op == Absorber && !Weight.isMinValue()) { - Ops.push_back(std::make_pair(Absorber, APInt(Bitwidth, 1))); - return MadeChange; - } - // If this is a binary operation of the right kind with only one use then // add its operands to the expression. if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { @@ -604,7 +561,6 @@ static bool LinearizeExprTree(BinaryOperator *I, // The leaves, repeated according to their weights, represent the linearized // form of the expression. - Constant *Cst = 0; // Accumulate constants here. for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { Value *V = LeafOrder[i]; LeafMap::iterator It = Leaves.find(V); @@ -618,31 +574,14 @@ static bool LinearizeExprTree(BinaryOperator *I, continue; // Ensure the leaf is only output once. It->second = 0; - // Glob all constants together into Cst. - if (Constant *C = dyn_cast<Constant>(V)) { - C = EvaluateRepeatedConstant(Opcode, C, Weight); - Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C; - continue; - } - // Add non-constant Ops.push_back(std::make_pair(V, Weight)); } - // Add any constants back into Ops, all globbed together and reduced to having - // weight 1 for the convenience of users. - Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); - if (Cst && Cst != Identity) { - // If combining multiple constants resulted in the absorber then the entire - // expression must evaluate to the absorber. - if (Cst == Absorber) - Ops.clear(); - Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1))); - } - // For nilpotent operations or addition there may be no operands, for example // because the expression was "X xor X" or consisted of 2^Bitwidth additions: // in both cases the weight reduces to 0 causing the value to be skipped. if (Ops.empty()) { + Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); assert(Identity && "Associative operation without identity!"); Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); } @@ -656,8 +595,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { assert(Ops.size() > 1 && "Single values should be used directly!"); - // Since our optimizations never increase the number of operations, the new - // expression can always be written by reusing the existing binary operators + // Since our optimizations should never increase the number of operations, the + // new expression can usually be written reusing the existing binary operators // from the original expression tree, without creating any new instructions, // though the rewritten expression may have a completely different topology. // We take care to not change anything if the new expression will be the same @@ -671,6 +610,20 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, unsigned Opcode = I->getOpcode(); BinaryOperator *Op = I; + /// NotRewritable - The operands being written will be the leaves of the new + /// expression and must not be used as inner nodes (via NodesToRewrite) by + /// mistake. Inner nodes are always reassociable, and usually leaves are not + /// (if they were they would have been incorporated into the expression and so + /// would not be leaves), so most of the time there is no danger of this. But + /// in rare cases a leaf may become reassociable if an optimization kills uses + /// of it, or it may momentarily become reassociable during rewriting (below) + /// due it being removed as an operand of one of its uses. Ensure that misuse + /// of leaf nodes as inner nodes cannot occur by remembering all of the future + /// leaves and refusing to reuse any of them as inner nodes. + SmallPtrSet<Value*, 8> NotRewritable; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + NotRewritable.insert(Ops[i].Op); + // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. @@ -703,12 +656,14 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // the old operands with the new ones. DEBUG(dbgs() << "RA: " << *Op << '\n'); if (NewLHS != OldLHS) { - if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode)) + BinaryOperator *BO = isReassociableOp(OldLHS, Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(0, NewLHS); } if (NewRHS != OldRHS) { - if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode)) + BinaryOperator *BO = isReassociableOp(OldRHS, Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(1, NewRHS); } @@ -732,7 +687,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, Op->swapOperands(); } else { // Overwrite with the new right-hand side. - if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode)) + BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(1, NewRHS); ExpressionChanged = Op; @@ -745,7 +701,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // Now deal with the left-hand side. If this is already an operation node // from the original expression then just rewrite the rest of the expression // into it. - if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) { + BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode); + if (BO && !NotRewritable.count(BO)) { Op = BO; continue; } @@ -1446,9 +1403,26 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - if (Ops.size() == 1) return Ops[0].Op; - + Constant *Cst = 0; unsigned Opcode = I->getOpcode(); + while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { + Constant *C = cast<Constant>(Ops.pop_back_val().Op); + Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C; + } + // If there was nothing but constants then we are done. + if (Ops.empty()) + return Cst; + + // Put the combined constant back at the end of the operand list, except if + // there is no point. For example, an add of 0 gets dropped here, while a + // multiplication by zero turns the whole expression into zero. + if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) { + if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType())) + return Cst; + Ops.push_back(ValueEntry(0, Cst)); + } + + if (Ops.size() == 1) return Ops[0].Op; // Handle destructive annihilation due to identities between elements in the // argument list here. diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index ea1de63de7..5524e01230 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -18,15 +18,15 @@ #define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/BasicBlock.h" #include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 686520e724..28aaddc50e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -19,26 +19,26 @@ #define DEBUG_TYPE "sccp" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" +#include "llvm/InstVisitor.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d95c855ce7..1c220ca0f6 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,33 +25,34 @@ #define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" +#include "llvm/InstVisitor.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -334,7 +335,7 @@ private: class UseBuilder; friend class AllocaPartitioning::UseBuilder; -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; #endif @@ -404,106 +405,17 @@ private: }; } -template <typename DerivedT, typename RetT> -class AllocaPartitioning::BuilderBase - : public InstVisitor<DerivedT, RetT> { -public: - BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : TD(TD), - AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), - P(P) { - enqueueUsers(AI, 0); +static Value *foldSelectInst(SelectInst &SI) { + // If the condition being selected on is a constant or the same value is + // being selected between, fold the select. Yes this does (rarely) happen + // early on. + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) + return SI.getOperand(1+CI->isZero()); + if (SI.getOperand(1) == SI.getOperand(2)) { + return SI.getOperand(1); } - -protected: - const DataLayout &TD; - const uint64_t AllocSize; - AllocaPartitioning &P; - - SmallPtrSet<Use *, 8> VisitedUses; - - struct OffsetUse { - Use *U; - int64_t Offset; - }; - SmallVector<OffsetUse, 8> Queue; - - // The active offset and use while visiting. - Use *U; - int64_t Offset; - - void enqueueUsers(Instruction &I, int64_t UserOffset) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); - UI != UE; ++UI) { - if (VisitedUses.insert(&UI.getUse())) { - OffsetUse OU = { &UI.getUse(), UserOffset }; - Queue.push_back(OU); - } - } - } - - bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { - GEPOffset = Offset; - for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) - continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - uint64_t ElementOffset = SL->getElementOffset(ElementIdx); - // Check that we can continue to model this GEP in a signed 64-bit offset. - if (ElementOffset > INT64_MAX || - (GEPOffset >= 0 && - ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) { - DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " - << "what can be represented in an int64_t!\n" - << " alloca: " << P.AI << "\n"); - return false; - } - if (GEPOffset < 0) - GEPOffset = ElementOffset + (uint64_t)-GEPOffset; - else - GEPOffset += ElementOffset; - continue; - } - - APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); - Index *= APInt(Index.getBitWidth(), - TD.getTypeAllocSize(GTI.getIndexedType())); - Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, - /*isSigned*/true); - // Check if the result can be stored in our int64_t offset. - if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) { - DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " - << "what can be represented in an int64_t!\n" - << " alloca: " << P.AI << "\n"); - return false; - } - - GEPOffset = Index.getSExtValue(); - } - return true; - } - - Value *foldSelectInst(SelectInst &SI) { - // If the condition being selected on is a constant or the same value is - // being selected between, fold the select. Yes this does (rarely) happen - // early on. - if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) - return SI.getOperand(1+CI->isZero()); - if (SI.getOperand(1) == SI.getOperand(2)) { - assert(*U == SI.getOperand(1)); - return SI.getOperand(1); - } - return 0; - } -}; + return 0; +} /// \brief Builder for the alloca partitioning. /// @@ -511,63 +423,45 @@ protected: /// of an alloca and splitting the partitions for each load and store at each /// offset. class AllocaPartitioning::PartitionBuilder - : public BuilderBase<PartitionBuilder, bool> { - friend class InstVisitor<PartitionBuilder, bool>; + : public PtrUseVisitor<PartitionBuilder> { + friend class PtrUseVisitor<PartitionBuilder>; + friend class InstVisitor<PartitionBuilder>; + typedef PtrUseVisitor<PartitionBuilder> Base; + + const uint64_t AllocSize; + AllocaPartitioning &P; SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap; public: - PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {} - - /// \brief Run the builder over the allocation. - bool operator()() { - // Note that we have to re-evaluate size on each trip through the loop as - // the queue grows at the tail. - for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { - U = Queue[Idx].U; - Offset = Queue[Idx].Offset; - if (!visit(cast<Instruction>(U->getUser()))) - return false; - } - return true; - } + PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P) + : PtrUseVisitor<PartitionBuilder>(DL), + AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), + P(P) {} private: - bool markAsEscaping(Instruction &I) { - P.PointerEscapingInstr = &I; - return false; - } - - void insertUse(Instruction &I, int64_t Offset, uint64_t Size, + void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which have a zero size or don't overlap the - // allocation. - if (Size == 0 || - (Offset >= 0 && (uint64_t)Offset >= AllocSize) || - (Offset < 0 && (uint64_t)-Offset >= Size)) { + // Completely skip uses which have a zero size or start either before or + // past the end of the allocation. + if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset - << " which starts past the end of the " << AllocSize - << " byte alloca:\n" + << " which has zero size or starts outside of the " + << AllocSize << " byte alloca:\n" << " alloca: " << P.AI << "\n" << " use: " << I << "\n"); return; } - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset - << " to start at the beginning of the alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - Size -= (uint64_t)-Offset; - Offset = 0; - } - - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; + uint64_t BeginOffset = Offset.getZExtValue(); + uint64_t EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. + // NOTE! This may appear superficially to be something we could ignore + // entirely, but that is not so! There may be PHI-node uses where some + // instructions are dead but not others. We can't completely ignore the + // PHI node, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -581,9 +475,9 @@ private: P.Partitions.push_back(New); } - bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset, + void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, bool IsVolatile) { - uint64_t Size = TD.getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); // If this memory access can be shown to *statically* extend outside the // bounds of of the allocation, it's behavior is undefined, so simply @@ -592,15 +486,15 @@ private: // risk of overflow. // FIXME: We should instead consider the pointer to have escaped if this // function is being instrumented for addressing bugs or race conditions. - if (Offset < 0 || (uint64_t)Offset >= AllocSize || - Size > (AllocSize - (uint64_t)Offset)) { + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << P.AI << "\n" << " use: " << I << "\n"); - return true; + return; } // We allow splitting of loads and stores where the type is an integer type @@ -611,54 +505,61 @@ private: IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; insertUse(I, Offset, Size, IsSplittable); - return true; - } - - bool visitBitCastInst(BitCastInst &BC) { - enqueueUsers(BC, Offset); - return true; - } - - bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { - int64_t GEPOffset; - if (!computeConstantGEPOffset(GEPI, GEPOffset)) - return markAsEscaping(GEPI); - - enqueueUsers(GEPI, GEPOffset); - return true; } - bool visitLoadInst(LoadInst &LI) { + void visitLoadInst(LoadInst &LI) { assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + + if (!IsOffsetKnown) + return PI.setAborted(&LI); + return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); } - bool visitStoreInst(StoreInst &SI) { + void visitStoreInst(StoreInst &SI) { Value *ValOp = SI.getValueOperand(); if (ValOp == *U) - return markAsEscaping(SI); + return PI.setEscapedAndAborted(&SI); + if (!IsOffsetKnown) + return PI.setAborted(&SI); assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); } - bool visitMemSetInst(MemSetInst &II) { + void visitMemSetInst(MemSetInst &II) { assert(II.getRawDest() == *U && "Pointer use is not the destination?"); ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - insertUse(II, Offset, Size, Length); - return true; + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) + // Zero-length mem transfer intrinsics can be ignored entirely. + return; + + if (!IsOffsetKnown) + return PI.setAborted(&II); + + insertUse(II, Offset, + Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue(), + (bool)Length); } - bool visitMemTransferInst(MemTransferInst &II) { + void visitMemTransferInst(MemTransferInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - if (!Size) + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) // Zero-length mem transfer intrinsics can be ignored entirely. - return true; + return; + + if (!IsOffsetKnown) + return PI.setAborted(&II); + + uint64_t RawOffset = Offset.getLimitedValue(); + uint64_t Size = Length ? Length->getLimitedValue() + : AllocSize - RawOffset; MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; @@ -666,12 +567,12 @@ private: Offsets.IsSplittable = Length; if (*U == II.getRawDest()) { - Offsets.DestBegin = Offset; - Offsets.DestEnd = Offset + Size; + Offsets.DestBegin = RawOffset; + Offsets.DestEnd = RawOffset + Size; } if (*U == II.getRawSource()) { - Offsets.SourceBegin = Offset; - Offsets.SourceEnd = Offset + Size; + Offsets.SourceBegin = RawOffset; + Offsets.SourceEnd = RawOffset + Size; } // If we have set up end offsets for both the source and the destination, @@ -684,7 +585,7 @@ private: // In that case, we can completely elide the transfer. if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) { P.Partitions[PrevIdx].kill(); - return true; + return; } // Otherwise we have an offset transfer within the same alloca. We can't @@ -697,7 +598,7 @@ private: // For non-volatile transfers this is a no-op. if (!II.isVolatile()) - return true; + return; // Otherwise just suppress splitting. Offsets.IsSplittable = false; @@ -717,23 +618,25 @@ private: "Already have intrinsic in map but haven't seen both ends"); (void)Inserted; } - - return true; } // Disable SRoA for any intrinsics except for lifetime invariants. // FIXME: What about debug instrinsics? This matches old behavior, but // doesn't make sense. - bool visitIntrinsicInst(IntrinsicInst &II) { + void visitIntrinsicInst(IntrinsicInst &II) { + if (!IsOffsetKnown) + return PI.setAborted(&II); + if (II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end) { ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); - uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue()); + uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), + Length->getLimitedValue()); insertUse(II, Offset, Size, true); - return true; + return; } - return markAsEscaping(II); + Base::visitIntrinsicInst(II); } Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { @@ -753,14 +656,14 @@ private: llvm::tie(UsedI, I) = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - Size = std::max(Size, TD.getTypeStoreSize(LI->getType())); + Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); continue; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { Value *Op = SI->getOperand(0); if (Op == UsedI) return SI; - Size = std::max(Size, TD.getTypeStoreSize(Op->getType())); + Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); continue; } @@ -781,54 +684,62 @@ private: return 0; } - bool visitPHINode(PHINode &PN) { + void visitPHINode(PHINode &PN) { + if (PN.use_empty()) + return; + if (!IsOffsetKnown) + return PI.setAborted(&PN); + // See if we already have computed info on this node. std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN]; if (PHIInfo.first) { PHIInfo.second = true; insertUse(PN, Offset, PHIInfo.first); - return true; + return; } // Check for an unsafe use of the PHI node. - if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) - return markAsEscaping(*EscapingI); + if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) + return PI.setAborted(UnsafeI); insertUse(PN, Offset, PHIInfo.first); - return true; } - bool visitSelectInst(SelectInst &SI) { + void visitSelectInst(SelectInst &SI) { + if (SI.use_empty()) + return; if (Value *Result = foldSelectInst(SI)) { if (Result == *U) // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. - enqueueUsers(SI, Offset); + enqueueUsers(SI); - return true; + return; } + if (!IsOffsetKnown) + return PI.setAborted(&SI); // See if we already have computed info on this node. std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI]; if (SelectInfo.first) { SelectInfo.second = true; insertUse(SI, Offset, SelectInfo.first); - return true; + return; } // Check for an unsafe use of the PHI node. - if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) - return markAsEscaping(*EscapingI); + if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) + return PI.setAborted(UnsafeI); insertUse(SI, Offset, SelectInfo.first); - return true; } /// \brief Disable SROA entirely if there are unhandled users of the alloca. - bool visitInstruction(Instruction &I) { return markAsEscaping(I); } + void visitInstruction(Instruction &I) { + PI.setAborted(&I); + } }; - /// \brief Use adder for the alloca partitioning. /// /// This class adds the uses of an alloca to all of the partitions which they @@ -847,26 +758,22 @@ private: /// partition space is pre-sorted, and do a logarithmic search for the /// partition needed, making the total visit a classical ((N + M) * log(N)) /// complexity operation. -class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> { +class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> { + friend class PtrUseVisitor<UseBuilder>; friend class InstVisitor<UseBuilder>; + typedef PtrUseVisitor<UseBuilder> Base; + + const uint64_t AllocSize; + AllocaPartitioning &P; /// \brief Set to de-duplicate dead instructions found in the use walk. SmallPtrSet<Instruction *, 4> VisitedDeadInsts; public: UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) - : BuilderBase<UseBuilder>(TD, AI, P) {} - - /// \brief Run the builder over the allocation. - void operator()() { - // Note that we have to re-evaluate size on each trip through the loop as - // the queue grows at the tail. - for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { - U = Queue[Idx].U; - Offset = Queue[Idx].Offset; - this->visit(cast<Instruction>(U->getUser())); - } - } + : PtrUseVisitor<UseBuilder>(TD), + AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), + P(P) {} private: void markAsDead(Instruction &I) { @@ -874,20 +781,14 @@ private: P.DeadUsers.push_back(&I); } - void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { + void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) { // If the use has a zero size or extends outside of the allocation, record // it as a dead use for elimination later. - if (Size == 0 || (uint64_t)Offset >= AllocSize || - (Offset < 0 && (uint64_t)-Offset >= Size)) + if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) return markAsDead(User); - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - Size -= (uint64_t)-Offset; - Offset = 0; - } - - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; + uint64_t BeginOffset = Offset.getZExtValue(); + uint64_t EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. @@ -910,15 +811,15 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) { - uint64_t Size = TD.getTypeStoreSize(Ty); + void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { + uint64_t Size = DL.getTypeStoreSize(Ty); // If this memory access can be shown to *statically* extend outside the // bounds of of the allocation, it's behavior is undefined, so simply // ignore it. Note that this is more strict than the generic clamping // behavior of insertUse. - if (Offset < 0 || (uint64_t)Offset >= AllocSize || - Size > (AllocSize - (uint64_t)Offset)) + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) return markAsDead(I); insertUse(I, Offset, Size); @@ -928,40 +829,47 @@ private: if (BC.use_empty()) return markAsDead(BC); - enqueueUsers(BC, Offset); + return Base::visitBitCastInst(BC); } void visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (GEPI.use_empty()) return markAsDead(GEPI); - int64_t GEPOffset; - if (!computeConstantGEPOffset(GEPI, GEPOffset)) - llvm_unreachable("Unable to compute constant offset for use"); - - enqueueUsers(GEPI, GEPOffset); + return Base::visitGetElementPtrInst(GEPI); } void visitLoadInst(LoadInst &LI) { + assert(IsOffsetKnown); handleLoadOrStore(LI.getType(), LI, Offset); } void visitStoreInst(StoreInst &SI) { + assert(IsOffsetKnown); handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); } void visitMemSetInst(MemSetInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - insertUse(II, Offset, Size); + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) + return markAsDead(II); + + assert(IsOffsetKnown); + insertUse(II, Offset, Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue()); } void visitMemTransferInst(MemTransferInst &II) { ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); - uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; - if (!Size) + if ((Length && Length->getValue() == 0) || + (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) return markAsDead(II); + assert(IsOffsetKnown); + uint64_t Size = Length ? Length->getLimitedValue() + : AllocSize - Offset.getLimitedValue(); + MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && Offsets.DestBegin == Offsets.SourceBegin) @@ -971,34 +879,39 @@ private: } void visitIntrinsicInst(IntrinsicInst &II) { + assert(IsOffsetKnown); assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); - insertUse(II, Offset, - std::min(AllocSize - Offset, Length->getLimitedValue())); + insertUse(II, Offset, std::min(Length->getLimitedValue(), + AllocSize - Offset.getLimitedValue())); } - void insertPHIOrSelect(Instruction &User, uint64_t Offset) { + void insertPHIOrSelect(Instruction &User, const APInt &Offset) { uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first; // For PHI and select operands outside the alloca, we can't nuke the entire // phi or select -- the other side might still be relevant, so we special // case them here and use a separate structure to track the operands // themselves which should be replaced with undef. - if (Offset >= AllocSize) { + if ((Offset.isNegative() && Offset.uge(Size)) || + (!Offset.isNegative() && Offset.uge(AllocSize))) { P.DeadOperands.push_back(U); return; } insertUse(User, Offset, Size); } + void visitPHINode(PHINode &PN) { if (PN.use_empty()) return markAsDead(PN); + assert(IsOffsetKnown); insertPHIOrSelect(PN, Offset); } + void visitSelectInst(SelectInst &SI) { if (SI.use_empty()) return markAsDead(SI); @@ -1007,7 +920,7 @@ private: if (Result == *U) // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. - enqueueUsers(SI, Offset); + enqueueUsers(SI); else // Otherwise the operand to the select is dead, and we can replace it // with undef. @@ -1016,6 +929,7 @@ private: return; } + assert(IsOffsetKnown); insertPHIOrSelect(SI, Offset); } @@ -1122,13 +1036,20 @@ void AllocaPartitioning::splitAndMergePartitions() { AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) : -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif PointerEscapingInstr(0) { PartitionBuilder PB(TD, AI, *this); - if (!PB()) + PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI); + if (PtrI.isEscaped() || PtrI.isAborted()) { + // FIXME: We should sink the escape vs. abort info into the caller nicely, + // possibly by just storing the PtrInfo in the AllocaPartitioning. + PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + assert(PointerEscapingInstr && "Did not track a bad instruction"); return; + } // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. @@ -1162,7 +1083,9 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); - UB(); + PtrI = UB.visitPtr(AI); + assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); + assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -1382,11 +1305,7 @@ class SROA : public FunctionPass { /// \brief A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more /// efficient. - SmallVector<Instruction *, 8> DeadInsts; - - /// \brief A set to prevent repeatedly marking an instruction split into many - /// uses as dead. Only used to guard insertion into DeadInsts. - SmallPtrSet<Instruction *, 4> DeadSplitInsts; + SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts; /// \brief Post-promotion worklist. /// @@ -1573,7 +1492,7 @@ private: do { LoadInst *LI = Loads.pop_back_val(); LI->replaceAllUsesWith(NewPN); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } while (!Loads.empty()); // Inject loads into all of the pred blocks. @@ -1717,7 +1636,7 @@ private: DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI->replaceAllUsesWith(V); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } } }; @@ -2116,11 +2035,11 @@ static bool isVectorPromotionViable(const DataLayout &TD, EndIndex > Ty->getNumElements()) return false; - // FIXME: We should build shuffle vector instructions to handle - // non-element-sized accesses. - if ((EndOffset - BeginOffset) != ElementSize && - (EndOffset - BeginOffset) != VecSize) - return false; + assert(EndIndex > BeginIndex && "Empty vector!"); + uint64_t NumElements = EndIndex - BeginIndex; + Type *PartitionTy + = (NumElements == 1) ? Ty->getElementType() + : VectorType::get(Ty->getElementType(), NumElements); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { if (MI->isVolatile()) @@ -2134,8 +2053,17 @@ static bool isVectorPromotionViable(const DataLayout &TD, } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (!isa<LoadInst>(I->U->getUser()) && - !isa<StoreInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LI->isVolatile()) + return false; + if (!canConvertValue(TD, PartitionTy, LI->getType())) + return false; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + if (SI->isVolatile()) + return false; + if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) + return false; + } else { return false; } } @@ -2155,6 +2083,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, AllocaPartitioning::const_use_iterator I, AllocaPartitioning::const_use_iterator E) { uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); + // Don't create integer types larger than the maximum bitwidth. + if (SizeInBits > IntegerType::MAX_INT_BITS) + return false; // Don't try to handle allocas with bit-padding. if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) @@ -2193,7 +2124,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } @@ -2209,7 +2140,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } @@ -2241,18 +2172,23 @@ static bool isIntegerWideningViable(const DataLayout &TD, static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { + DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast<IntegerType>(V->getType()); assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element extends past full value"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!"); - if (Ty != IntTy) + if (Ty != IntTy) { V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); + DEBUG(dbgs() << " trunced: " << *V << "\n"); + } return V; } @@ -2262,20 +2198,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, IntegerType *Ty = cast<IntegerType>(V->getType()); assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - if (Ty != IntTy) + DEBUG(dbgs() << " start: " << *V << "\n"); + if (Ty != IntTy) { V = IRB.CreateZExt(V, IntTy, Name + ".ext"); + DEBUG(dbgs() << " extended: " << *V << "\n"); + } assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); + DEBUG(dbgs() << " masked: " << *Old << "\n"); V = IRB.CreateOr(Old, V, Name + ".insert"); + DEBUG(dbgs() << " inserted: " << *V << "\n"); } return V; } @@ -2430,42 +2373,47 @@ private: return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset); } - ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) { + unsigned getIndex(uint64_t Offset) { assert(VecTy && "Can only call getIndex when rewriting a vector"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); uint32_t Index = RelOffset / ElementSize; assert(Index * ElementSize == RelOffset); - return IRB.getInt32(Index); + return Index; } void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.push_back(I); + Pass.DeadInsts.insert(I); } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *Result; - if (LI.getType() == VecTy->getElementType() || - BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - getIndex(IRB, BeginOffset), getName(".extract")); - } else { - Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + if (NumElements == 1) { + V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), + getName(".extract")); + DEBUG(dbgs() << " extract: " << *V << "\n"); + } else if (NumElements < VecTy->getNumElements()) { + SmallVector<Constant*, 8> Mask; + Mask.reserve(NumElements); + for (unsigned i = BeginIndex; i != EndIndex; ++i) + Mask.push_back(IRB.getInt32(i)); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + getName(".extract")); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); } - if (Result->getType() != LI.getType()) - Result = convertValue(TD, IRB, Result, LI.getType()); - LI.replaceAllUsesWith(Result); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *Result << "\n"); - return true; + return V; } - bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2473,12 +2421,10 @@ private: V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); - LI.replaceAllUsesWith(V); - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return true; + if (Offset > 0 || EndOffset < NewAllocaEndOffset) + V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, + getName(".extract")); + return V; } bool visitLoadInst(LoadInst &LI) { @@ -2488,7 +2434,46 @@ private: IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(LI.getType())) { + bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of the original allocation it's behavior is undefined. Rather + // than trying to transform it, just replace it with undef. + // FIXME: We should do something more clever for functions being + // instrumented by asan. + // FIXME: Eventually, once ASan and friends can flush out bugs here, this + // should be transformed to a load of null making it unreachable. + uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); + if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { + LI.replaceAllUsesWith(UndefValue::get(LI.getType())); + Pass.DeadInsts.insert(&LI); + deleteIfTriviallyDead(OldOp); + DEBUG(dbgs() << " to: undef!!\n"); + return true; + } + + Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); + bool IsPtrAdjusted = false; + Value *V; + if (VecTy) { + V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + } else if (IntTy && LI.getType()->isIntegerTy()) { + V = rewriteIntegerLoad(IRB, LI); + } else if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + } else { + Type *LTy = TargetTy->getPointerTo(); + V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), + getPartitionTypeAlign(TargetTy), + LI.isVolatile(), getName(".load")); + IsPtrAdjusted = true; + } + V = convertValue(TD, IRB, V, TargetTy); + + if (IsSplitIntLoad) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -2498,21 +2483,8 @@ private: assert(LI.getType()->getIntegerBitWidth() == TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide loads can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NewAllocaTy, NarrowTy); - Value *V; // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); - if (IsConvertable) - V = convertValue(TD, IRB, - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")), - NarrowTy); - else - V = IRB.CreateAlignedLoad( - getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy), getName(".load")); // Create a placeholder value with the same type as LI to use as the // basis for the new value. This allows us to replace the uses of LI with // the computed value, and then replace the placeholder with LI, leaving @@ -2524,67 +2496,77 @@ private: LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; - if (Pass.DeadSplitInsts.insert(&LI)) - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return IsConvertable; - } - - if (VecTy) - return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntTy && LI.getType()->isIntegerTy()) - return rewriteIntegerLoad(IRB, LI); - - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, NewAllocaTy, LI.getType())) { - Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); - Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); - LI.replaceAllUsesWith(NewV); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *NewLI << "\n"); - return !LI.isVolatile(); + } else { + LI.replaceAllUsesWith(V); } - assert(!IntTy && "Invalid load found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - LI.getPointerOperand()->getType()); - LI.setOperand(0, NewPtr); - LI.setAlignment(getPartitionTypeAlign(LI.getType())); - DEBUG(dbgs() << " to: " << LI << "\n"); - + Pass.DeadInsts.insert(&LI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !LI.isVolatile(); - } - - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI, - Value *OldOp) { - Value *V = SI.getValueOperand(); - if (V->getType() == ElementTy || - BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - if (V->getType() != ElementTy) - V = convertValue(TD, IRB, V, ElementTy); + DEBUG(dbgs() << " to: " << *V << "\n"); + return !LI.isVolatile() && !IsPtrAdjusted; + } + + bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + StoreInst &SI, Value *OldOp) { + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + Type *PartitionTy + = (NumElements == 1) ? ElementTy + : VectorType::get(ElementTy, NumElements); + if (V->getType() != PartitionTy) + V = convertValue(TD, IRB, V, PartitionTy); + if (NumElements < VecTy->getNumElements()) { + // We need to mix in the existing elements. LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); - V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), - getName(".insert")); - } else if (V->getType() != VecTy) { + if (NumElements == 1) { + V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex), + getName(".insert")); + DEBUG(dbgs() << " insert: " << *V << "\n"); + } else { + // When inserting a smaller vector into the larger to store, we first + // use a shuffle vector to widen it with undef elements, and then + // a second shuffle vector to select between the loaded vector and the + // incoming vector. + SmallVector<Constant*, 8> Mask; + Mask.reserve(VecTy->getNumElements()); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i - BeginIndex)); + else + Mask.push_back(UndefValue::get(IRB.getInt32Ty())); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + getName(".expand")); + DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + + Mask.clear(); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i)); + else + Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); + V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask), + getName("insert")); + DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + } + } else { V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - Value *V = SI.getValueOperand(); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); @@ -2596,7 +2578,7 @@ private: } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2608,74 +2590,53 @@ private: assert(OldOp == OldPtr); IRBuilder<> IRB(&SI); - if (VecTy) - return rewriteVectorizedStoreInst(IRB, SI, OldOp); - Type *ValueTy = SI.getValueOperand()->getType(); + Value *V = SI.getValueOperand(); + + // Strip all inbounds GEPs and pointer casts to try to dig out any root + // alloca that should be re-examined after promoting this alloca. + if (V->getType()->isPointerTy()) + if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) + Pass.PostPromotionWorklist.insert(AI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) { + if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); - assert(ValueTy->isIntegerTy() && + assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeStoreSizeInBits(ValueTy) && + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(ValueTy->getIntegerBitWidth() == + assert(V->getType()->getIntegerBitWidth() == TD.getTypeSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); - Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy, - BeginOffset, getName(".extract")); - StoreInst *NewSI; - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NarrowTy, NewAllocaTy); - if (IsConvertable) - NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy), - &NewAI, NewAI.getAlignment()); - else - NewSI = IRB.CreateAlignedStore( - V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy)); - (void)NewSI; - if (Pass.DeadSplitInsts.insert(&SI)) - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return IsConvertable; + V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, + getName(".extract")); } - if (IntTy && ValueTy->isIntegerTy()) - return rewriteIntegerStore(IRB, SI); - - // Strip all inbounds GEPs and pointer casts to try to dig out any root - // alloca that should be re-examined after promoting this alloca. - if (ValueTy->isPointerTy()) - if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand() - ->stripInBoundsOffsets())) - Pass.PostPromotionWorklist.insert(AI); + if (VecTy) + return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + if (IntTy && V->getType()->isIntegerTy()) + return rewriteIntegerStore(IRB, V, SI); + StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, ValueTy, NewAllocaTy)) { - Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); - StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), - SI.isVolatile()); - (void)NewSI; - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return !SI.isVolatile(); + canConvertValue(TD, V->getType(), NewAllocaTy)) { + V = convertValue(TD, IRB, V, NewAllocaTy); + NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + } else { + Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); + NewSI = IRB.CreateAlignedStore(V, NewPtr, + getPartitionTypeAlign(V->getType()), + SI.isVolatile()); } - - assert(!IntTy && "Invalid store found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - SI.getPointerOperand()->getType()); - SI.setOperand(1, NewPtr); - SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType())); - DEBUG(dbgs() << " to: " << SI << "\n"); - + (void)NewSI; + Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !SI.isVolatile(); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } bool visitMemSetInst(MemSetInst &II) { @@ -2695,8 +2656,7 @@ private: } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); @@ -2747,7 +2707,7 @@ private: IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - V, getIndex(IRB, BeginOffset), + V, IRB.getInt32(getIndex(BeginOffset)), getName(".insert")), &NewAI, NewAI.getAlignment()); (void)Store; @@ -2852,8 +2812,7 @@ private: return false; } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && EndOffset == NewAllocaEndOffset; @@ -2916,7 +2875,7 @@ private: // We have to extract rather than load. Src = IRB.CreateExtractElement( IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), - getIndex(IRB, BeginOffset), + IRB.getInt32(getIndex(BeginOffset)), getName(".copyextract")); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2944,7 +2903,7 @@ private: // We have to insert into a loaded copy before storing. Src = IRB.CreateInsertElement( IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - Src, getIndex(IRB, BeginOffset), + Src, IRB.getInt32(getIndex(BeginOffset)), getName(".insert")); } @@ -2963,8 +2922,7 @@ private: assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); ConstantInt *Size = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), @@ -3533,7 +3491,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { DI != DE; ++DI) { Changed = true; (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); - DeadInsts.push_back(*DI); + DeadInsts.insert(*DI); } for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), DE = P.dead_op_end(); @@ -3544,7 +3502,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (Instruction *OldI = dyn_cast<Instruction>(OldV)) if (isInstructionTriviallyDead(OldI)) { Changed = true; - DeadInsts.push_back(OldI); + DeadInsts.insert(OldI); } } @@ -3565,7 +3523,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) { /// We also record the alloca instructions deleted here so that they aren't /// subsequently handed to mem2reg to promote. void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { - DeadSplitInsts.clear(); while (!DeadInsts.empty()) { Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); @@ -3577,7 +3534,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { // Zero out the operand and see if it becomes trivially dead. *OI = 0; if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + DeadInsts.insert(U); } if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 39630fd027..762bb15c59 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -13,14 +13,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" +#include "llvm-c/Transforms/Scalar.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a46d09c320..c8656fbd8e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -21,8 +21,15 @@ #define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -34,19 +41,12 @@ #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9f24bb635e..9160f04fe2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -23,19 +23,19 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" -#include "llvm/Attributes.h" -#include "llvm/Support/CFG.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" #include "llvm/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index bacada58c1..d4643b9d80 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -17,32 +17,26 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/IRBuilder.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! +#include "llvm/DataLayout.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! +#include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); -static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, - cl::init(false), - cl::desc("Enable unsafe double to float " - "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -87,945 +81,6 @@ public: //===----------------------------------------------------------------------===// -// Helper Functions -//===----------------------------------------------------------------------===// - -static bool CallHasFloatingPointArgument(const CallInst *CI) { - for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); - it != e; ++it) { - if ((*it)->getType()->isFloatingPointTy()) - return true; - } - return false; -} - -/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality -/// comparisons with With. -static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) - if (IC->isEquality() && IC->getOperand(1) == With) - continue; - // Unknown instruction. - return false; - } - return true; -} - -//===----------------------------------------------------------------------===// -// String and Memory LibCall Optimizations -//===----------------------------------------------------------------------===// - -namespace { -//===---------------------------------------===// -// 'strspn' Optimizations - -struct StrSpnOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return 0; - - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strspn(s, "") -> 0 - // strspn("", s) -> 0 - if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_not_of(S2); - if (Pos == StringRef::npos) Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'strcspn' Optimizations - -struct StrCSpnOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return 0; - - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strcspn("", s) -> 0 - if (HasS1 && S1.empty()) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_of(S2); - if (Pos == StringRef::npos) Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - // strcspn(s, "") -> strlen(s) - if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); - - return 0; - } -}; - -//===---------------------------------------===// -// 'strstr' Optimizations - -struct StrStrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isPointerTy()) - return 0; - - // fold strstr(x, x) -> x. - if (CI->getArgOperand(0) == CI->getArgOperand(1)) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); - if (!StrLen) - return 0; - Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD, TLI); - if (!StrNCmp) - return 0; - for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); - UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(*UI++); - Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, - ConstantInt::getNullValue(StrNCmp->getType()), - "cmp"); - Old->replaceAllUsesWith(Cmp); - Old->eraseFromParent(); - } - return CI; - } - - // See if either input string is a constant string. - StringRef SearchStr, ToFindStr; - bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); - bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); - - // fold strstr(x, "") -> x. - if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // If both strings are known, constant fold it. - if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); - - if (Offset == StringRef::npos) // strstr("foo", "bar") -> null - return Constant::getNullValue(CI->getType()); - - // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getArgOperand(0), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); - } - - // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; - } - return 0; - } -}; - - -//===---------------------------------------===// -// 'memcmp' Optimizations - -struct MemCmpOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy(32)) - return 0; - - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) return 0; - uint64_t Len = LenC->getZExtValue(); - - if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Constant::getNullValue(CI->getType()); - - // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS - if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); - return B.CreateSub(LHSV, RHSV, "chardiff"); - } - - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) - StringRef LHSStr, RHSStr; - if (getConstantStringInfo(LHS, LHSStr) && - getConstantStringInfo(RHS, RHSStr)) { - // Make sure we're not reading out-of-bounds memory. - if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; - uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); - return ConstantInt::get(CI->getType(), Ret); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'memcpy' Optimizations - -struct MemCpyOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memmove' Optimizations - -struct MemMoveOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memset' Optimizations - -struct MemSetOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===----------------------------------------------------------------------===// -// Math Library Optimizations -//===----------------------------------------------------------------------===// - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - bool CheckRetType; - UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - if (CheckRetType) { - // Check if all the uses for function like 'sin' are converted to float. - for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); - ++UseI) { - FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; - } - } - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); - } -}; - -//===---------------------------------------===// -// 'cos*' Optimizations -struct CosOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; - if (UnsafeFPShrink && Callee->getName() == "cos" && - TLI->has(LibFunc::cosf)) { - UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); - Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); - } - - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - - // cos(-x) -> cos(x) - Value *Op1 = CI->getArgOperand(0); - if (BinaryOperator::isFNeg(Op1)) { - BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); - return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); - } - return Ret; - } -}; - -//===---------------------------------------===// -// 'pow*' Optimizations - -struct PowOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; - if (UnsafeFPShrink && Callee->getName() == "pow" && - TLI->has(LibFunc::powf)) { - UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); - Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); - } - - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - - Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); - if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 - return Op1C; - if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) - return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); - } - - ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return Ret; - - if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 - return ConstantFP::get(CI->getType(), 1.0); - - if (Op2C->isExactlyValue(0.5)) { - // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). - // This is faster than calling pow, and still handles negative zero - // and negative infinity correctly. - // TODO: In fast-math mode, this could be just sqrt(x). - // TODO: In finite-only mode, this could be just fabs(sqrt(x)). - Value *Inf = ConstantFP::getInfinity(CI->getType()); - Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); - Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, - Callee->getAttributes()); - Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, - Callee->getAttributes()); - Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); - Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); - return Sel; - } - - if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x - return Op1; - if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x - return B.CreateFMul(Op1, Op1, "pow2"); - if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x - return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), - Op1, "powrecip"); - return 0; - } -}; - -//===---------------------------------------===// -// 'exp2' Optimizations - -struct Exp2Opt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; - if (UnsafeFPShrink && Callee->getName() == "exp2" && - TLI->has(LibFunc::exp2)) { - UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); - Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); - } - - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - - Value *Op = CI->getArgOperand(0); - // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 - // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - Value *LdExpArg = 0; - if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } - - if (LdExpArg) { - const char *Name; - if (Op->getType()->isFloatTy()) - Name = "ldexpf"; - else if (Op->getType()->isDoubleTy()) - Name = "ldexp"; - else - Name = "ldexpl"; - - Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), - B.getInt32Ty(), NULL); - CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; - } - return Ret; - } -}; - -//===----------------------------------------------------------------------===// -// Integer Optimizations -//===----------------------------------------------------------------------===// - -//===---------------------------------------===// -// 'ffs*' Optimizations - -struct FFSOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 1 || - !FT->getReturnType()->isIntegerTy(32) || - !FT->getParamType(0)->isIntegerTy()) - return 0; - - Value *Op = CI->getArgOperand(0); - - // Constant fold. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - if (CI->isZero()) // ffs(0) -> 0. - return B.getInt32(0); - // ffs(c) -> cttz(c)+1 - return B.getInt32(CI->getValue().countTrailingZeros() + 1); - } - - // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 - Type *ArgType = Op->getType(); - Value *F = Intrinsic::getDeclaration(Callee->getParent(), - Intrinsic::cttz, ArgType); - Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); - V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); - V = B.CreateIntCast(V, B.getInt32Ty(), false); - - Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); - return B.CreateSelect(Cond, V, B.getInt32(0)); - } -}; - -//===---------------------------------------===// -// 'isdigit' Optimizations - -struct IsDigitOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) - return 0; - - // isdigit(c) -> (c-'0') <u 10 - Value *Op = CI->getArgOperand(0); - Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); - return B.CreateZExt(Op, CI->getType()); - } -}; - -//===---------------------------------------===// -// 'isascii' Optimizations - -struct IsAsciiOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) - return 0; - - // isascii(c) -> c <u 128 - Value *Op = CI->getArgOperand(0); - Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); - return B.CreateZExt(Op, CI->getType()); - } -}; - -//===---------------------------------------===// -// 'abs', 'labs', 'llabs' Optimizations - -struct AbsOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - // We require integer(integer) where the types agree. - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - FT->getParamType(0) != FT->getReturnType()) - return 0; - - // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getArgOperand(0); - Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), - "ispos"); - Value *Neg = B.CreateNeg(Op, "neg"); - return B.CreateSelect(Pos, Op, Neg); - } -}; - - -//===---------------------------------------===// -// 'toascii' Optimizations - -struct ToAsciiOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - // We require i32(i32) - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isIntegerTy(32)) - return 0; - - // isascii(c) -> c & 0x7f - return B.CreateAnd(CI->getArgOperand(0), - ConstantInt::get(CI->getType(),0x7F)); - } -}; - -//===----------------------------------------------------------------------===// -// Formatting and IO Optimizations -//===----------------------------------------------------------------------===// - -//===---------------------------------------===// -// 'printf' Optimizations - -struct PrintFOpt : public LibCallOptimization { - Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, - IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) - return 0; - - // Empty format string -> noop. - if (FormatStr.empty()) // Tolerate printf's declared void. - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), 0); - - // Do not do any of the following transformations if the printf return value - // is used, in general the printf return value is not compatible with either - // putchar() or puts(). - if (!CI->use_empty()) - return 0; - - // printf("x") -> putchar('x'), even for '%'. - if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); - if (CI->use_empty() || !Res) return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } - - // printf("foo\n") --> puts("foo") - if (FormatStr[FormatStr.size()-1] == '\n' && - FormatStr.find('%') == std::string::npos) { // no format characters. - // Create a string literal with no \n on it. We expect the constant merge - // pass to be run after this pass, to merge duplicate strings. - FormatStr = FormatStr.drop_back(); - Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, TD, TLI); - return (CI->use_empty() || !NewCI) ? - NewCI : - ConstantInt::get(CI->getType(), FormatStr.size()+1); - } - - // Optimize specific format strings. - // printf("%c", chr) --> putchar(chr) - if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); - - if (CI->use_empty() || !Res) return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } - - // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, TD, TLI); - } - return 0; - } - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - - if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { - return V; - } - - // printf(format, ...) -> iprintf(format, ...) if no floating point - // arguments. - if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *IPrintFFn = - M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(IPrintFFn); - B.Insert(New); - return New; - } - return 0; - } -}; - -//===---------------------------------------===// -// 'sprintf' Optimizations - -struct SPrintFOpt : public LibCallOptimization { - Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, - IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; - - // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumArgOperands() == 2) { - // Make sure there's no % in the constant array. We could try to handle - // %% -> % in the future if we cared. - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') - return 0; // we found a format specifier, bail out. - - // These optimizations require DataLayout. - if (!TD) return 0; - - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), // Copy the - FormatStr.size() + 1), 1); // nul byte. - return ConstantInt::get(CI->getType(), FormatStr.size()); - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return 0; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = CastToCStr(CI->getArgOperand(0), B); - B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); - B.CreateStore(B.getInt8(0), Ptr); - - return ConstantInt::get(CI->getType(), 1); - } - - if (FormatStr[1] == 's') { - // These optimizations require DataLayout. - if (!TD) return 0; - - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); - if (!Len) - return 0; - Value *IncLen = B.CreateAdd(Len, - ConstantInt::get(Len->getType(), 1), - "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); - - // The sprintf result is the unincremented number of bytes in the string. - return B.CreateIntCast(Len, CI->getType(), false); - } - return 0; - } - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { - return V; - } - - // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating - // point arguments. - if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *SIPrintFFn = - M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SIPrintFFn); - B.Insert(New); - return New; - } - return 0; - } -}; - -//===---------------------------------------===// -// 'fwrite' Optimizations - -struct FWriteOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require a pointer, an integer, an integer, a pointer, returning integer. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - !FT->getParamType(2)->isIntegerTy() || - !FT->getParamType(3)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeC || !CountC) return 0; - uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); - - // If this is writing zero records, remove the call (it's a noop). - if (Bytes == 0) - return ConstantInt::get(CI->getType(), 0); - - // If this is writing one byte, turn it into fputc. - // This optimisation is only valid, if the return value is unused. - if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'fputs' Optimizations - -struct FPutsOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - // Require two pointers. Also, we can't optimize if return value is used. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !CI->use_empty()) - return 0; - - // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) return 0; - // Known to have no uses (see above). - return EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - } -}; - -//===---------------------------------------===// -// 'fprintf' Optimizations - -struct FPrintFOpt : public LibCallOptimization { - Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, - IRBuilder<> &B) { - // All the optimizations depend on the format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; - - // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumArgOperands() == 2) { - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return 0; // We found a format specifier. - - // These optimizations require DataLayout. - if (!TD) return 0; - - Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return 0; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, - TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; - } - - if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) - return 0; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - } - return 0; - } - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { - return V; - } - - // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no - // floating point arguments. - if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *FIPrintFFn = - M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(FIPrintFFn); - B.Insert(New); - return New; - } - return 0; - } -}; - -//===---------------------------------------===// -// 'puts' Optimizations - -struct PutsOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - - // Check for a constant string. - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return 0; - - if (Str.empty() && CI->use_empty()) { - // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); - if (CI->use_empty() || !Res) return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } - - return 0; - } -}; - -} // end anonymous namespace. - -//===----------------------------------------------------------------------===// // SimplifyLibCalls Pass Implementation //===----------------------------------------------------------------------===// @@ -1036,25 +91,11 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - // String and Memory LibCall Optimizations - StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; - MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; - // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; - UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; - // Integer Optimizations - FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; - ToAsciiOpt ToAscii; - // Formatting and IO Optimizations - SPrintFOpt SPrintF; PrintFOpt PrintF; - FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; - PutsOpt Puts; bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false), - UnsafeUnaryDoubleFP(true) { + SimplifyLibCalls() : FunctionPass(ID) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); @@ -1104,86 +145,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // String and Memory LibCall Optimizations - Optimizations["strspn"] = &StrSpn; - Optimizations["strcspn"] = &StrCSpn; - Optimizations["strstr"] = &StrStr; - Optimizations["memcmp"] = &MemCmp; - AddOpt(LibFunc::memcpy, &MemCpy); - Optimizations["memmove"] = &MemMove; - AddOpt(LibFunc::memset, &MemSet); - - // Math Library Optimizations - Optimizations["cosf"] = &Cos; - Optimizations["cos"] = &Cos; - Optimizations["cosl"] = &Cos; - Optimizations["powf"] = &Pow; - Optimizations["pow"] = &Pow; - Optimizations["powl"] = &Pow; - Optimizations["llvm.pow.f32"] = &Pow; - Optimizations["llvm.pow.f64"] = &Pow; - Optimizations["llvm.pow.f80"] = &Pow; - Optimizations["llvm.pow.f128"] = &Pow; - Optimizations["llvm.pow.ppcf128"] = &Pow; - Optimizations["exp2l"] = &Exp2; - Optimizations["exp2"] = &Exp2; - Optimizations["exp2f"] = &Exp2; - Optimizations["llvm.exp2.ppcf128"] = &Exp2; - Optimizations["llvm.exp2.f128"] = &Exp2; - Optimizations["llvm.exp2.f80"] = &Exp2; - Optimizations["llvm.exp2.f64"] = &Exp2; - Optimizations["llvm.exp2.f32"] = &Exp2; - - AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); - AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); - AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); - AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); - AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); - AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); - AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); - - if(UnsafeFPShrink) { - AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); - AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); - } - - // Integer Optimizations - Optimizations["ffs"] = &FFS; - Optimizations["ffsl"] = &FFS; - Optimizations["ffsll"] = &FFS; - Optimizations["abs"] = &Abs; - Optimizations["labs"] = &Abs; - Optimizations["llabs"] = &Abs; - Optimizations["isdigit"] = &IsDigit; - Optimizations["isascii"] = &IsAscii; - Optimizations["toascii"] = &ToAscii; - - // Formatting and IO Optimizations - Optimizations["sprintf"] = &SPrintF; - Optimizations["printf"] = &PrintF; - AddOpt(LibFunc::fwrite, &FWrite); - AddOpt(LibFunc::fputs, &FPuts); - Optimizations["fprintf"] = &FPrintF; - Optimizations["puts"] = &Puts; } @@ -1231,7 +192,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Something changed! Changed = true; - ++NumSimplified; // Inspect the instruction after the call (which was potentially just // added) next. diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 34f1d6c622..cde9c178ad 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "sink" #include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6557d630a9..e357378524 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,8 +52,12 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -61,16 +65,12 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 6815e411b4..3a19b706ea 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CallSite.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 9fea11391a..e8833f2092 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -13,20 +13,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Constant.h" -#include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Constant.h" #include "llvm/DataLayout.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 6b04e3d17b..385ceb13b2 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -17,17 +17,17 @@ #define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumBroken, "Number of blocks inserted"); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index fa2faa2dad..62b79bf2b3 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -20,10 +22,8 @@ #include "llvm/LLVMContext.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Type.h" using namespace llvm; @@ -43,11 +43,13 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), + Constant *StrLen = M->getOrInsertFunction("strlen", + AttributeSet::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -70,11 +72,13 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + Constant *StrNLen = M->getOrInsertFunction("strnlen", + AttributeSet::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -97,12 +101,14 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AttributeWithIndex AWI = - AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI), + Constant *StrChr = M->getOrInsertFunction("strchr", + AttributeSet::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -123,11 +129,13 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), + Value *StrNCmp = M->getOrInsertFunction("strncmp", + AttributeSet::get(M->getContext(), + AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -152,10 +160,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -175,10 +184,12 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrNCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -199,11 +210,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -228,10 +239,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), + Value *MemChr = M->getOrInsertFunction("memchr", + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -257,11 +269,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), + Value *MemCmp = M->getOrInsertFunction("memcmp", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -280,7 +293,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, - const AttrListPtr &Attrs) { + const AttributeSet &Attrs) { SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. @@ -335,10 +348,11 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), + Value *PutS = M->getOrInsertFunction("puts", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -358,11 +372,12 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), + F = M->getOrInsertFunction("fputc", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -391,12 +406,13 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FPutsName, + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -423,13 +439,14 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FWriteName, + AttributeSet::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index bee2f7bcb6..1699a3b648 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -16,11 +16,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bypass-slow-division" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Instructions.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 7ba9f6d9d2..12f2e4b83e 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/ADT/SmallVector.h" #include <map> using namespace llvm; @@ -99,12 +99,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(NewFunc->getContext(), - AttrListPtr::ReturnIndex, + AttributeSet::ReturnIndex, OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(NewFunc->getContext(), - AttrListPtr::FunctionIndex, + AttributeSet::FunctionIndex, OldFunc->getAttributes() .getFnAttributes())); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1dac6b5b8b..114babd101 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" #include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - // Copy all of the dependent libraries over. - for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) - New->addLibrary(*I); - // Loop over all of the global variables, making corresponding globals in the // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 281714f4c1..a596df64fd 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,6 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -21,18 +28,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> using namespace llvm; diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 99b5830163..f8a0cafadc 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Type.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 009847f87b..c176cf1075 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,8 +13,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/IRBuilder.h" @@ -22,12 +27,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" #include "llvm/Module.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -668,10 +668,29 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (hasLifetimeMarkers(AI)) continue; - builder.CreateLifetimeStart(AI); + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = 0; + if (ConstantInt *AIArraySize = + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (IFI.TD) { + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + } + + builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); - builder.CreateLifetimeEnd(AI); + builder.CreateLifetimeEnd(AI, AllocaSize); } } } diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 55227e2714..67dcbe446b 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "integer-division" +#include "llvm/Transforms/Utils/IntegerDivision.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" -#include "llvm/IRBuilder.h" -#include "llvm/Transforms/Utils/IntegerDivision.h" using namespace llvm; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 5e05c83c35..5dddb6e28a 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -29,17 +29,17 @@ #define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Pass.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" #include "llvm/Support/PredIteratorCache.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumLCSSA, "Number of live out of a loop variables"); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a954d82c05..0e56817a1b 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -13,8 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" @@ -26,20 +34,12 @@ #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 9d9e201665..6a68416a3d 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -39,26 +39,26 @@ #define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 20237500c3..d24b334681 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -18,12 +18,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 67e17f4ca8..242e7fa021 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -23,12 +23,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 02bdcda391..8756d26ca4 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -21,8 +23,6 @@ #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <vector> diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 930555424d..7b89ffd401 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -36,6 +36,8 @@ #define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -43,12 +45,10 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <csetjmp> #include <set> using namespace llvm; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1547439b5c..74a457ce81 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,16 +14,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index f4ca81af6d..70fbf13b97 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Instructions.h" #include "llvm/Function.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; STATISTIC(NumPromoted, "Number of alloca's promoted"); diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 233bc12d3c..363e9367f3 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,9 +13,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Transforms/IPO.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -37,7 +37,7 @@ namespace { next = seed; } - int rand(void) { + int rand() { next = next * 1103515245 + 12345; return (unsigned int)(next / 65536) % 32768; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 558de9d12e..b41f433659 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -27,26 +27,26 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> using namespace llvm; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 72d4199a2a..e1e7f4d668 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" @@ -25,7 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9823433e86..3cae77227c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,6 +13,14 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" @@ -25,15 +33,6 @@ #include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Operator.h" -#include "llvm/Type.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" @@ -42,9 +41,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Type.h" #include <algorithm> -#include <set> #include <map> +#include <set> using namespace llvm; static cl::opt<unsigned> @@ -858,7 +858,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (PredHasWeights) { GetBranchWeights(PTI, Weights); - // branch-weight metadata is inconsistant here. + // branch-weight metadata is inconsistent here. if (Weights.size() != 1 + PredCases.size()) PredHasWeights = SuccHasWeights = false; } else if (SuccHasWeights) @@ -870,7 +870,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SmallVector<uint64_t, 8> SuccWeights; if (SuccHasWeights) { GetBranchWeights(TI, SuccWeights); - // branch-weight metadata is inconsistant here. + // branch-weight metadata is inconsistent here. if (SuccWeights.size() != 1 + BBCases.size()) PredHasWeights = SuccHasWeights = false; } else if (PredHasWeights) @@ -967,8 +967,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[*I]); + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } @@ -1193,7 +1193,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { I != E; ++I) { if (PHINode *PN = dyn_cast<PHINode>(I)) { Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); + Value *BB2V = PN->getIncomingValueForBlock(BB2); MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); } else { FirstNonPhiInBBEnd = &*I; @@ -1202,7 +1202,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { } if (!FirstNonPhiInBBEnd) return false; - + // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. We scan backward for obviously identical @@ -1415,7 +1415,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { if (BB1V == BIParentV) continue; - // Check for saftey. + // Check for safety. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) { // An unfolded ConstantExpr could end up getting expanded into // Instructions. Don't speculate this and another instruction at @@ -3511,22 +3511,44 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const DataLayout *TD, + const TargetTransformInfo *TTI, const SmallDenseMap<PHINode*, Type*>& ResultTypes) { - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. - if (SI->getNumCases() * 10 >= TableSize * 4) - return true; - // If each table would fit in a register, we should build it anyway. + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), E = ResultTypes.end(); I != E; ++I) { - if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) - return false; + Type *Ty = I->second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || + !TTI->getScalarTargetTransformInfo()->isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; } - return true; + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; } /// SwitchToLookupTable - If the switch is only used to initialize one or more @@ -3538,7 +3560,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, const TargetTransformInfo *TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables()) + // Only build lookup table when we have a target that supports it. + if (!TTI || !TTI->getScalarTargetTransformInfo() || + !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables()) return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could @@ -3605,7 +3629,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) + if (!ShouldBuildLookupTable(SI, TableSize, TD, TTI, ResultTypes)) return false; // Create the BB that does the lookups. diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 110f380857..5883293a81 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -15,18 +15,18 @@ #define DEBUG_TYPE "indvars" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/DataLayout.h" +#include "llvm/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 65353dc460..8b2eeb9928 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -15,18 +15,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DataLayout.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Type.h" using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 581b8d3ea2..82bfe0ccea 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -15,12 +15,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -34,6 +36,7 @@ protected: Function *Caller; const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; LLVMContext* Context; public: LibCallOptimization() { } @@ -48,10 +51,12 @@ public: =0; Value *optimizeCall(CallInst *CI, const DataLayout *TD, - const TargetLibraryInfo *TLI, IRBuilder<> &B) { + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; this->TLI = TLI; + this->LCS = LCS; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -83,6 +88,29 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool callHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -772,6 +800,863 @@ struct StrToOpt : public LibCallOptimization { } }; +struct StrSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return 0; + } +}; + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + // Fold the memcmp and normalize the result. This way we get consistent + // results across multiple platforms. + uint64_t Ret = 0; + int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); + if (Cmp < 0) + Ret = -1; + else if (Cmp > 0) + Ret = 1; + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + +struct UnsafeFPLibCallOptimization : public LibCallOptimization { + bool UnsafeFPShrink; + UnsafeFPLibCallOptimization(bool UnsafeFPShrink) { + this->UnsafeFPShrink = UnsafeFPShrink; + } +}; + +struct CosOpt : public UnsafeFPLibCallOptimization { + CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + // cos(-x) -> cos(x) + Value *Op1 = CI->getArgOperand(0); + if (BinaryOperator::isFNeg(Op1)) { + BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); + return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); + } + return Ret; + } +}; + +struct PowOpt : public UnsafeFPLibCallOptimization { + PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + return Op1C; + if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + } + + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); + if (Op2C == 0) return Ret; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinity correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); + return Sel; + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateFMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), + Op1, "powrecip"); + return 0; + } +}; + +struct Exp2Opt : public UnsafeFPLibCallOptimization { + Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + Value *LdExpArg = 0; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } + + if (LdExpArg) { + const char *Name; + if (Op->getType()->isFloatTy()) + Name = "ldexpf"; + else if (Op->getType()->isDoubleTy()) + Name = "ldexp"; + else + Name = "ldexpl"; + + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } + return Ret; + } +}; + +//===----------------------------------------------------------------------===// +// Integer Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FFSOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 1 || + !FT->getReturnType()->isIntegerTy(32) || + !FT->getParamType(0)->isIntegerTy()) + return 0; + + Value *Op = CI->getArgOperand(0); + + // Constant fold. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (CI->isZero()) // ffs(0) -> 0. + return B.getInt32(0); + // ffs(c) -> cttz(c)+1 + return B.getInt32(CI->getValue().countTrailingZeros() + 1); + } + + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(Callee->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, B.getInt32(0)); + } +}; + +struct AbsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(integer) where the types agree. + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getArgOperand(0); + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), + "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); + } +}; + +struct IsDigitOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct IsAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct ToAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require i32(i32) + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // toascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(),0x7F)); + } +}; + +//===----------------------------------------------------------------------===// +// Formatting and IO Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct PrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return 0; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return 0; + + // printf("x") -> putchar('x'), even for '%'. + if (FormatStr.size() == 1) { + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == std::string::npos) { // no format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr = FormatStr.drop_back(); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); + + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) { + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct SPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; + Value *IncLen = B.CreateAdd(Len, + ConstantInt::get(Len->getType(), 1), + "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FPrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + return 0; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FWriteOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require a pointer, an integer, an integer, a pointer, returning integer. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + return 0; + } +}; + +struct FPutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + // Require two pointers. Also, we can't optimize if return value is used. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) return 0; + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); + } +}; + +struct PutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + } // End anonymous namespace. namespace llvm { @@ -779,6 +1664,8 @@ namespace llvm { class LibCallSimplifierImpl { const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + bool UnsafeFPShrink; StringMap<LibCallOptimization*> Optimizations; // Fortified library call optimizations. @@ -789,7 +1676,7 @@ class LibCallSimplifierImpl { StpCpyChkOpt StpCpyChk; StrNCpyChkOpt StrNCpyChk; - // String and memory library call optimizations. + // String library call optimizations. StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; @@ -802,12 +1689,48 @@ class LibCallSimplifierImpl { StrLenOpt StrLen; StrPBrkOpt StrPBrk; StrToOpt StrTo; + StrSpnOpt StrSpn; + StrCSpnOpt StrCSpn; + StrStrOpt StrStr; + + // Memory library call optimizations. + MemCmpOpt MemCmp; + MemCpyOpt MemCpy; + MemMoveOpt MemMove; + MemSetOpt MemSet; + + // Math library call optimizations. + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + + // Integer library call optimizations. + FFSOpt FFS; + AbsOpt Abs; + IsDigitOpt IsDigit; + IsAsciiOpt IsAscii; + ToAsciiOpt ToAscii; + + // Formatting and IO library call optimizations. + PrintFOpt PrintF; + SPrintFOpt SPrintF; + FPrintFOpt FPrintF; + FWriteOpt FWrite; + FPutsOpt FPuts; + PutsOpt Puts; void initOptimizations(); + void addOpt(LibFunc::Func F, LibCallOptimization* Opt); + void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); public: - LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) { + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, + bool UnsafeFPShrink = false) + : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true), + Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { this->TD = TD; this->TLI = TLI; + this->LCS = LCS; + this->UnsafeFPShrink = UnsafeFPShrink; } Value *optimizeCall(CallInst *CI); @@ -823,25 +1746,106 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["__strncpy_chk"] = &StrNCpyChk; Optimizations["__stpncpy_chk"] = &StrNCpyChk; - // String and memory library call optimizations. - Optimizations["strcat"] = &StrCat; - Optimizations["strncat"] = &StrNCat; - Optimizations["strchr"] = &StrChr; - Optimizations["strrchr"] = &StrRChr; - Optimizations["strcmp"] = &StrCmp; - Optimizations["strncmp"] = &StrNCmp; - Optimizations["strcpy"] = &StrCpy; - Optimizations["stpcpy"] = &StpCpy; - Optimizations["strncpy"] = &StrNCpy; - Optimizations["strlen"] = &StrLen; - Optimizations["strpbrk"] = &StrPBrk; - Optimizations["strtol"] = &StrTo; - Optimizations["strtod"] = &StrTo; - Optimizations["strtof"] = &StrTo; - Optimizations["strtoul"] = &StrTo; - Optimizations["strtoll"] = &StrTo; - Optimizations["strtold"] = &StrTo; - Optimizations["strtoull"] = &StrTo; + // String library call optimizations. + addOpt(LibFunc::strcat, &StrCat); + addOpt(LibFunc::strncat, &StrNCat); + addOpt(LibFunc::strchr, &StrChr); + addOpt(LibFunc::strrchr, &StrRChr); + addOpt(LibFunc::strcmp, &StrCmp); + addOpt(LibFunc::strncmp, &StrNCmp); + addOpt(LibFunc::strcpy, &StrCpy); + addOpt(LibFunc::stpcpy, &StpCpy); + addOpt(LibFunc::strncpy, &StrNCpy); + addOpt(LibFunc::strlen, &StrLen); + addOpt(LibFunc::strpbrk, &StrPBrk); + addOpt(LibFunc::strtol, &StrTo); + addOpt(LibFunc::strtod, &StrTo); + addOpt(LibFunc::strtof, &StrTo); + addOpt(LibFunc::strtoul, &StrTo); + addOpt(LibFunc::strtoll, &StrTo); + addOpt(LibFunc::strtold, &StrTo); + addOpt(LibFunc::strtoull, &StrTo); + addOpt(LibFunc::strspn, &StrSpn); + addOpt(LibFunc::strcspn, &StrCSpn); + addOpt(LibFunc::strstr, &StrStr); + + // Memory library call optimizations. + addOpt(LibFunc::memcmp, &MemCmp); + addOpt(LibFunc::memcpy, &MemCpy); + addOpt(LibFunc::memmove, &MemMove); + addOpt(LibFunc::memset, &MemSet); + + // Math library call optimizations. + addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } + + addOpt(LibFunc::cosf, &Cos); + addOpt(LibFunc::cos, &Cos); + addOpt(LibFunc::cosl, &Cos); + addOpt(LibFunc::powf, &Pow); + addOpt(LibFunc::pow, &Pow); + addOpt(LibFunc::powl, &Pow); + Optimizations["llvm.pow.f32"] = &Pow; + Optimizations["llvm.pow.f64"] = &Pow; + Optimizations["llvm.pow.f80"] = &Pow; + Optimizations["llvm.pow.f128"] = &Pow; + Optimizations["llvm.pow.ppcf128"] = &Pow; + addOpt(LibFunc::exp2l, &Exp2); + addOpt(LibFunc::exp2, &Exp2); + addOpt(LibFunc::exp2f, &Exp2); + Optimizations["llvm.exp2.ppcf128"] = &Exp2; + Optimizations["llvm.exp2.f128"] = &Exp2; + Optimizations["llvm.exp2.f80"] = &Exp2; + Optimizations["llvm.exp2.f64"] = &Exp2; + Optimizations["llvm.exp2.f32"] = &Exp2; + + // Integer library call optimizations. + addOpt(LibFunc::ffs, &FFS); + addOpt(LibFunc::ffsl, &FFS); + addOpt(LibFunc::ffsll, &FFS); + addOpt(LibFunc::abs, &Abs); + addOpt(LibFunc::labs, &Abs); + addOpt(LibFunc::llabs, &Abs); + addOpt(LibFunc::isdigit, &IsDigit); + addOpt(LibFunc::isascii, &IsAscii); + addOpt(LibFunc::toascii, &ToAscii); + + // Formatting and IO library call optimizations. + addOpt(LibFunc::printf, &PrintF); + addOpt(LibFunc::sprintf, &SPrintF); + addOpt(LibFunc::fprintf, &FPrintF); + addOpt(LibFunc::fwrite, &FWrite); + addOpt(LibFunc::fputs, &FPuts); + addOpt(LibFunc::puts, &Puts); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { @@ -852,14 +1856,26 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); if (LCO) { IRBuilder<> Builder(CI); - return LCO->optimizeCall(CI, TD, TLI, Builder); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); } return 0; } +void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + +void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, - const TargetLibraryInfo *TLI) { - Impl = new LibCallSimplifierImpl(TD, TLI); + const TargetLibraryInfo *TLI, + bool UnsafeFPShrink) { + Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink); } LibCallSimplifier::~LibCallSimplifier() { @@ -870,4 +1886,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return Impl->optimizeCall(CI); } +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index b1cad06dff..8cf62196cc 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/BasicBlock.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Type.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; char UnifyFunctionExitNodes::ID = 0; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 4653a7d7c8..a48229132b 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -16,21 +16,13 @@ #define BBV_NAME "bb-vectorize" #define DEBUG_TYPE BBV_NAME -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -38,14 +30,23 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Vectorize.h" +#include "llvm/Type.h" #include <algorithm> #include <map> using namespace llvm; @@ -483,6 +484,10 @@ namespace { if (SelectInst *SI = dyn_cast<SelectInst>(I)) { T2 = SI->getCondition()->getType(); + } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) { + T2 = SI->getOperand(0)->getType(); + } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { + T2 = CI->getOperand(0)->getType(); } } @@ -671,6 +676,19 @@ namespace { return false; } + + bool isPureIEChain(InsertElementInst *IE) { + InsertElementInst *IENext = IE; + do { + if (!isa<UndefValue>(IENext->getOperand(0)) && + !isa<InsertElementInst>(IENext->getOperand(0))) { + return false; + } + } while ((IENext = + dyn_cast<InsertElementInst>(IENext->getOperand(0)))); + + return true; + } }; // This function implements one vectorization iteration on the provided @@ -987,10 +1005,11 @@ namespace { // We don't want to fuse to a type that will be split, even // if the two input types will also be split and there is no other // associated cost. - unsigned VParts = VTTI->getNumberOfParts(VT1); - if (VParts > 1) + unsigned VParts1 = VTTI->getNumberOfParts(VT1), + VParts2 = VTTI->getNumberOfParts(VT2); + if (VParts1 > 1 || VParts2 > 1) return false; - else if (!VParts && VCost == ICost + JCost) + else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) return false; CostSavings = ICost + JCost - VCost; @@ -1466,7 +1485,7 @@ namespace { PrunedTree.insert(QTop.first); // Visit each child, pruning as necessary... - DenseMap<ValuePair, size_t> BestChildren; + SmallVector<ValuePairWithDepth, 8> BestChildren; VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; K != QTopRange.second; ++K) { @@ -1498,7 +1517,7 @@ namespace { DenseSet<ValuePair> CurrentPairs; bool CanAdd = true; - for (DenseMap<ValuePair, size_t>::iterator C2 + for (SmallVector<ValuePairWithDepth, 8>::iterator C2 = BestChildren.begin(), E2 = BestChildren.end(); C2 != E2; ++C2) { if (C2->first.first == C->first.first || @@ -1583,22 +1602,22 @@ namespace { // to an already-selected child. Check for this here, and if a // conflict is found, then remove the previously-selected child // before adding this one in its place. - for (DenseMap<ValuePair, size_t>::iterator C2 + for (SmallVector<ValuePairWithDepth, 8>::iterator C2 = BestChildren.begin(); C2 != BestChildren.end();) { if (C2->first.first == C->first.first || C2->first.first == C->first.second || C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers)) - BestChildren.erase(C2++); + C2 = BestChildren.erase(C2); else ++C2; } - BestChildren.insert(ValuePairWithDepth(C->first, C->second)); + BestChildren.push_back(ValuePairWithDepth(C->first, C->second)); } - for (DenseMap<ValuePair, size_t>::iterator C + for (SmallVector<ValuePairWithDepth, 8>::iterator C = BestChildren.begin(), E2 = BestChildren.end(); C != E2; ++C) { size_t DepthF = getDepthFactor(C->first.first); @@ -1683,10 +1702,20 @@ namespace { // The set of pairs that have already contributed to the total cost. DenseSet<ValuePair> IncomingPairs; + // If the cost model were perfect, this might not be necessary; but we + // need to make sure that we don't get stuck vectorizing our own + // shuffle chains. + bool HasNontrivialInsts = false; + // The node weights represent the cost savings associated with // fusing the pair of instructions. for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), E = PrunedTree.end(); S != E; ++S) { + if (!isa<ShuffleVectorInst>(S->first) && + !isa<InsertElementInst>(S->first) && + !isa<ExtractElementInst>(S->first)) + HasNontrivialInsts = true; + bool FlipOrder = false; if (getDepthFactor(S->first)) { @@ -1760,9 +1789,12 @@ namespace { bool NeedsExtraction = false; for (Value::use_iterator I = S->first->use_begin(), IE = S->first->use_end(); I != IE; ++I) { - if (isa<ShuffleVectorInst>(*I) || - isa<InsertElementInst>(*I) || - isa<ExtractElementInst>(*I)) + if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { + // Shuffle can be folded if it has no other input + if (isa<UndefValue>(SI->getOperand(1))) + continue; + } + if (isa<ExtractElementInst>(*I)) continue; if (PrunedTreeInstrs.count(*I)) continue; @@ -1787,9 +1819,12 @@ namespace { NeedsExtraction = false; for (Value::use_iterator I = S->second->use_begin(), IE = S->second->use_end(); I != IE; ++I) { - if (isa<ShuffleVectorInst>(*I) || - isa<InsertElementInst>(*I) || - isa<ExtractElementInst>(*I)) + if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { + // Shuffle can be folded if it has no other input + if (isa<UndefValue>(SI->getOperand(1))) + continue; + } + if (isa<ExtractElementInst>(*I)) continue; if (PrunedTreeInstrs.count(*I)) continue; @@ -1839,14 +1874,37 @@ namespace { // Combining vector operations of the same type is also assumed // folded with other operations. - if (Ty1 == Ty2 && - (isa<ShuffleVectorInst>(O1) || - isa<InsertElementInst>(O1) || - isa<InsertElementInst>(O1)) && - (isa<ShuffleVectorInst>(O2) || - isa<InsertElementInst>(O2) || - isa<InsertElementInst>(O2))) - continue; + if (Ty1 == Ty2) { + // If both are insert elements, then both can be widened. + InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1), + *IEO2 = dyn_cast<InsertElementInst>(O2); + if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2)) + continue; + // If both are extract elements, and both have the same input + // type, then they can be replaced with a shuffle + ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1), + *EIO2 = dyn_cast<ExtractElementInst>(O2); + if (EIO1 && EIO2 && + EIO1->getOperand(0)->getType() == + EIO2->getOperand(0)->getType()) + continue; + // If both are a shuffle with equal operand types and only two + // unqiue operands, then they can be replaced with a single + // shuffle + ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1), + *SIO2 = dyn_cast<ShuffleVectorInst>(O2); + if (SIO1 && SIO2 && + SIO1->getOperand(0)->getType() == + SIO2->getOperand(0)->getType()) { + SmallSet<Value *, 4> SIOps; + SIOps.insert(SIO1->getOperand(0)); + SIOps.insert(SIO1->getOperand(1)); + SIOps.insert(SIO2->getOperand(0)); + SIOps.insert(SIO2->getOperand(1)); + if (SIOps.size() <= 2) + continue; + } + } int ESContrib; // This pair has already been formed. @@ -1894,6 +1952,13 @@ namespace { } } } + + if (!HasNontrivialInsts) { + DEBUG(if (DebugPairSelection) dbgs() << + "\tNo non-trivial instructions in tree;" + " override to zero effective size\n"); + EffSize = 0; + } } else { for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), E = PrunedTree.end(); S != E; ++S) @@ -2092,18 +2157,7 @@ namespace { if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { // If we have a pure insertelement chain, then this can be rewritten // into a chain that directly builds the larger type. - bool PureChain = true; - InsertElementInst *LIENext = LIE; - do { - if (!isa<UndefValue>(LIENext->getOperand(0)) && - !isa<InsertElementInst>(LIENext->getOperand(0))) { - PureChain = false; - break; - } - } while ((LIENext = - dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); - - if (PureChain) { + if (isPureIEChain(LIE)) { SmallVector<Value *, 8> VectElemts(numElemL, UndefValue::get(ArgTypeL->getScalarType())); InsertElementInst *LIENext = LIE; @@ -2849,6 +2903,7 @@ namespace { K->mutateType(getVecTypeForPair(L->getType(), H->getType())); combineMetadata(K, H); + K->intersectOptionalDataWith(H); for (unsigned o = 0; o < NumOperands; ++o) K->setOperand(o, ReplacedOperands[o]); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 892808760f..feeececedb 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6,355 +6,50 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops -// and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizes uses (will use) the codegen -// interfaces to generate IR that is likely to result in an optimal binary. -// -// The loop vectorizer combines consecutive loop iteration into a single -// 'wide' iteration. After this transformation the index is incremented -// by the SIMD vector width, and not by one. -// -// This pass has three parts: -// 1. The main loop pass that drives the different parts. -// 2. LoopVectorizationLegality - A unit that checks for the legality -// of the vectorization. -// 3. SingleBlockLoopVectorizer - A unit that performs the actual -// widening of instructions. -// 4. LoopVectorizationCostModel - A unit that checks for the profitability -// of vectorization. It decides on the optimal vector width, which -// can be one, if vectorization is not profitable. -//===----------------------------------------------------------------------===// -// -// The reduction-variable vectorization is based on the paper: -// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. -// -// Variable uniformity checks are inspired by: -// Karrenberg, R. and Hack, S. Whole Function Vectorization. -// -// Other ideas/concepts are from: -// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. -// -//===----------------------------------------------------------------------===// -#define LV_NAME "loop-vectorize" -#define DEBUG_TYPE LV_NAME -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Value.h" -#include "llvm/Function.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/ADT/SmallVector.h" +#include "LoopVectorize.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" +#include "llvm/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include <algorithm> -using namespace llvm; +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Type.h" +#include "llvm/Value.h" static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, - cl::desc("Set the default vectorization width. Zero is autoselect.")); + cl::desc("Sets the SIMD width. Zero is autoselect.")); -/// We don't vectorize loops with a known constant trip count below this number. -const unsigned TinyTripCountThreshold = 16; +static cl::opt<bool> +EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, + cl::desc("Enable if-conversion during vectorization.")); namespace { -// Forward declarations. -class LoopVectorizationLegality; -class LoopVectorizationCostModel; - -/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic -/// block to a specified vectorization factor (VF). -/// This class performs the widening of scalars into vectors, or multiple -/// scalars. This class also implements the following features: -/// * It inserts an epilogue loop for handling loops that don't have iteration -/// counts that are known to be a multiple of the vectorization factor. -/// * It handles the code generation for reduction variables. -/// * Scalarization (implementation using scalars) of un-vectorizable -/// instructions. -/// SingleBlockLoopVectorizer does not perform any vectorization-legality -/// checks, and relies on the caller to check for the different legality -/// aspects. The SingleBlockLoopVectorizer relies on the -/// LoopVectorizationLegality class to provide information about the induction -/// and reduction variables that were found to a given vectorization factor. -class SingleBlockLoopVectorizer { -public: - /// Ctor. - SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, - DominatorTree *dt, LPPassManager *Lpm, - unsigned VecWidth): - OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth), - Builder(Se->getContext()), Induction(0), OldInduction(0) { } - - // Perform the actual loop widening (vectorization). - void vectorize(LoopVectorizationLegality *Legal) { - ///Create a new empty loop. Unlink the old loop and connect the new one. - createEmptyLoop(Legal); - /// Widen each instruction in the old loop to a new one in the new loop. - /// Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); - // register the new loop. - updateAnalysis(); - } - -private: - /// Create an empty loop, based on the loop ranges of the old loop. - void createEmptyLoop(LoopVectorizationLegality *Legal); - /// Copy and widen the instructions from the old loop. - void vectorizeLoop(LoopVectorizationLegality *Legal); - /// Insert the new loop to the loop hierarchy and pass manager. - void updateAnalysis(); - - /// This instruction is un-vectorizable. Implement it as a sequence - /// of scalars. - void scalarizeInstruction(Instruction *Instr); - - /// Create a broadcast instruction. This method generates a broadcast - /// instruction (shuffle) for loop invariant values and for the induction - /// value. If this is the induction variable then we extend it to N, N+1, ... - /// this is needed because each iteration in the loop corresponds to a SIMD - /// element. - Value *getBroadcastInstrs(Value *V); - - /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 .. - /// for each element in the vector. Starting from zero. - Value *getConsecutiveVector(Value* Val); - - /// When we go over instructions in the basic block we rely on previous - /// values within the current basic block or on loop invariant values. - /// When we widen (vectorize) values we place them in the map. If the values - /// are not within the map, they have to be loop invariant, so we simply - /// broadcast them into a vector. - Value *getVectorValue(Value *V); - - /// Get a uniform vector of constant integers. We use this to get - /// vectors of ones and zeros for the reduction code. - Constant* getUniformVector(unsigned Val, Type* ScalarTy); - - typedef DenseMap<Value*, Value*> ValueMap; - - /// The original loop. - Loop *OrigLoop; - // Scev analysis to use. - ScalarEvolution *SE; - // Loop Info. - LoopInfo *LI; - // Dominator Tree. - DominatorTree *DT; - // Loop Pass Manager; - LPPassManager *LPM; - // The vectorization factor to use. - unsigned VF; - - // The builder that we use - IRBuilder<> Builder; - - // --- Vectorization state --- - - /// The vector-loop preheader. - BasicBlock *LoopVectorPreHeader; - /// The scalar-loop preheader. - BasicBlock *LoopScalarPreHeader; - /// Middle Block between the vector and the scalar. - BasicBlock *LoopMiddleBlock; - ///The ExitBlock of the scalar loop. - BasicBlock *LoopExitBlock; - ///The vector loop body. - BasicBlock *LoopVectorBody; - ///The scalar loop body. - BasicBlock *LoopScalarBody; - ///The first bypass block. - BasicBlock *LoopBypassBlock; - - /// The new Induction variable which was added to the new block. - PHINode *Induction; - /// The induction variable of the old basic block. - PHINode *OldInduction; - // Maps scalars to widened vectors. - ValueMap WidenMap; -}; - -/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and -/// to what vectorization factor. -/// This class does not look at the profitability of vectorization, only the -/// legality. This class has two main kinds of checks: -/// * Memory checks - The code in canVectorizeMemory checks if vectorization -/// will change the order of memory accesses in a way that will change the -/// correctness of the program. -/// * Scalars checks - The code in canVectorizeBlock checks for a number -/// of different conditions, such as the availability of a single induction -/// variable, that all types are supported and vectorize-able, etc. -/// This code reflects the capabilities of SingleBlockLoopVectorizer. -/// This class is also used by SingleBlockLoopVectorizer for identifying -/// induction variable and the different reduction variables. -class LoopVectorizationLegality { -public: - LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl): - TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { } - - /// This represents the kinds of reductions that we support. - enum ReductionKind { - NoReduction, /// Not a reduction. - IntegerAdd, /// Sum of numbers. - IntegerMult, /// Product of numbers. - IntegerOr, /// Bitwise or logical OR of numbers. - IntegerAnd, /// Bitwise or logical AND of numbers. - IntegerXor /// Bitwise or logical XOR of numbers. - }; - - /// This POD struct holds information about reduction variables. - struct ReductionDescriptor { - // Default C'tor - ReductionDescriptor(): - StartValue(0), LoopExitInstr(0), Kind(NoReduction) {} - - // C'tor. - ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K): - StartValue(Start), LoopExitInstr(Exit), Kind(K) {} - - // The starting value of the reduction. - // It does not have to be zero! - Value *StartValue; - // The instruction who's value is used outside the loop. - Instruction *LoopExitInstr; - // The kind of the reduction. - ReductionKind Kind; - }; - - /// ReductionList contains the reduction descriptors for all - /// of the reductions that were found in the loop. - typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; - - /// Returns true if it is legal to vectorize this loop. - /// This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. - bool canVectorize(); - - /// Returns the Induction variable. - PHINode *getInduction() {return Induction;} - - /// Returns the reduction variables found in the loop. - ReductionList *getReductionVars() { return &Reductions; } - - /// Check if the pointer returned by this GEP is consecutive - /// when the index is vectorized. This happens when the last - /// index of the GEP is consecutive, like the induction variable. - /// This check allows us to vectorize A[idx] into a wide load/store. - bool isConsecutiveGep(Value *Ptr); - - /// Returns true if this instruction will remain scalar after vectorization. - bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} - -private: - /// Check if a single basic block loop is vectorizable. - /// At this point we know that this is a loop with a constant trip count - /// and we only need to check individual instructions. - bool canVectorizeBlock(BasicBlock &BB); - - /// When we vectorize loops we may change the order in which - /// we read and write from memory. This method checks if it is - /// legal to vectorize the code, considering only memory constrains. - /// Returns true if BB is vectorizable - bool canVectorizeMemory(BasicBlock &BB); - - /// Returns True, if 'Phi' is the kind of reduction variable for type - /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. - bool AddReductionVar(PHINode *Phi, ReductionKind Kind); - /// Returns true if the instruction I can be a reduction variable of type - /// 'Kind'. - bool isReductionInstr(Instruction *I, ReductionKind Kind); - /// Returns True, if 'Phi' is an induction variable. - bool isInductionVariable(PHINode *Phi); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// DataLayout analysis. - DataLayout *DL; - - // --- vectorization state --- // - - /// Holds the induction variable. - PHINode *Induction; - /// Holds the reduction variables. - ReductionList Reductions; - /// Allowed outside users. This holds the reduction - /// vars which can be accessed from outside the loop. - SmallPtrSet<Value*, 4> AllowedExit; - /// This set holds the variables which are known to be uniform after - /// vectorization. - SmallPtrSet<Instruction*, 4> Uniforms; -}; - -/// LoopVectorizationCostModel - estimates the expected speedups due to -/// vectorization. -/// In many cases vectorization is not profitable. This can happen because -/// of a number of reasons. In this class we mainly attempt to predict -/// the expected speedup/slowdowns due to the supported instruction set. -/// We use the VectorTargetTransformInfo to query the different backends -/// for the cost of different operations. -class LoopVectorizationCostModel { -public: - /// C'tor. - LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, - LoopVectorizationLegality *Leg, - const VectorTargetTransformInfo *Vtti): - TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } - - /// Returns the most profitable vectorization factor for the loop that is - /// smaller or equal to the VF argument. This method checks every power - /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = 8); - -private: - /// Returns the expected execution cost. The unit of the cost does - /// not matter because we use the 'cost' units to compare different - /// vector widths. The cost that is returned is *not* normalized by - /// the factor width. - unsigned expectedCost(unsigned VF); - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - unsigned getInstructionCost(Instruction *I, unsigned VF); - - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - - /// Vectorization legality. - LoopVectorizationLegality *Legal; - /// Vector target information. - const VectorTargetTransformInfo *VTTI; -}; - +/// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -383,7 +78,7 @@ struct LoopVectorize : public LoopPass { L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL); + LoopVectorizationLegality LVL(L, SE, DL, DT); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; @@ -414,7 +109,7 @@ struct LoopVectorize : public LoopPass { "\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF); + InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -434,15 +129,44 @@ struct LoopVectorize : public LoopPass { }; -Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { - // Instructions that access the old induction variable - // actually want to get the new one. - if (V == OldInduction) - V = Induction; +}// namespace + +//===----------------------------------------------------------------------===// +// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and +// LoopVectorizationCostModel. +//===----------------------------------------------------------------------===// + +void +LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, + Loop *Lp, Value *Ptr) { + const SCEV *Sc = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch()); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + Pointers.push_back(Ptr); + Starts.push_back(AR->getStart()); + Ends.push_back(ScEnd); +} + +Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { // Create the types. LLVMContext &C = V->getContext(); Type *VTy = VectorType::get(V->getType(), VF); Type *I32 = IntegerType::getInt32Ty(C); + + // Save the current insertion location. + Instruction *Loc = Builder.GetInsertPoint(); + + // We need to place the broadcast of invariant variables outside the loop. + Instruction *Instr = dyn_cast<Instruction>(V); + bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody); + bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr; + + // Place the code for broadcasting invariant variables in the new preheader. + if (Invariant) + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + Constant *Zero = ConstantInt::get(I32, 0); Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF)); Value *UndefVal = UndefValue::get(VTy); @@ -450,27 +174,28 @@ Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero); // Broadcast the scalar into all locations in the vector. Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros, - "broadcast"); - // We are accessing the induction variable. Make sure to promote the - // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes. - if (V == Induction) - return getConsecutiveVector(Shuf); + "broadcast"); + + // Restore the builder insertion point. + if (Invariant) + Builder.SetInsertPoint(Loc); + return Shuf; } -Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { +Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, bool Negate) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && "Elem must be an integer"); // Create the types. Type *ITy = Val->getType()->getScalarType(); VectorType *Ty = cast<VectorType>(Val->getType()); - unsigned VLen = Ty->getNumElements(); + int VLen = Ty->getNumElements(); SmallVector<Constant*, 8> Indices; // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VLen; ++i) - Indices.push_back(ConstantInt::get(ITy, i)); + for (int i = 0; i < VLen; ++i) + Indices.push_back(ConstantInt::get(ITy, Negate ? (-i): i )); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); @@ -478,7 +203,17 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { return Builder.CreateAdd(Val, Cv, "induction"); } -bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { +bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); + + // If this value is a pointer induction variable we know it is consecutive. + PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); + if (Phi && Inductions.count(Phi)) { + InductionInfo II = Inductions[Phi]; + if (PtrInduction == II.IK) + return true; + } + GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr); if (!Gep) return false; @@ -491,7 +226,7 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) return false; - // We can emit wide load/stores only of the last index is the induction + // We can emit wide load/stores only if the last index is the induction // variable. const SCEV *Last = SE->getSCEV(LastIndex); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { @@ -506,7 +241,12 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { return false; } -Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { +bool LoopVectorizationLegality::isUniform(Value *V) { + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +} + +Value *InnerLoopVectorizer::getVectorValue(Value *V) { + assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); // If we saved a vectorized copy of V, use it. Value *&MapEntry = WidenMap[V]; @@ -520,17 +260,11 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { } Constant* -SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { - SmallVector<Constant*, 8> Indices; - // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VF; ++i) - Indices.push_back(ConstantInt::get(ScalarTy, Val, true)); - - // Add the consecutive indices to the vector value. - return ConstantVector::get(Indices); +InnerLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { + return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true)); } -void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. SmallVector<Value*, 8> Params; @@ -541,7 +275,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { // If we are accessing the old induction variable, use the new one. if (SrcOp == OldInduction) { - Params.push_back(getBroadcastInstrs(Induction)); + Params.push_back(getVectorValue(SrcOp)); continue; } @@ -600,63 +334,158 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { WidenMap[Instr] = VecResults; } +Value* +InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc) { + LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = + Legal->getRuntimePointerCheck(); + + if (!PtrRtCheck->Need) + return NULL; + + Value *MemoryRuntimeCheck = 0; + unsigned NumPointers = PtrRtCheck->Pointers.size(); + SmallVector<Value* , 2> Starts; + SmallVector<Value* , 2> Ends; + + SCEVExpander Exp(*SE, "induction"); + + // Use this type for pointer arithmetic. + Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0); + + for (unsigned i = 0; i < NumPointers; ++i) { + Value *Ptr = PtrRtCheck->Pointers[i]; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, OrigLoop)) { + DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << + *Ptr <<"\n"); + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } else { + DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n"); + + Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc); + Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i+1; j < NumPointers; ++j) { + Instruction::CastOps Op = Instruction::BitCast; + Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc); + Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc); + Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc); + Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc); + + Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Start0, End1, "bound0", Loc); + Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Start1, End0, "bound1", Loc); + Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, + "found.conflict", Loc); + if (MemoryRuntimeCheck) + MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, + MemoryRuntimeCheck, + IsConflict, + "conflict.rdx", Loc); + else + MemoryRuntimeCheck = IsConflict; + + } + } + + return MemoryRuntimeCheck; +} + void -SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { +InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass. - / | - / v -| [ ] <-- vector pre header. -| | -| v -| [ ] \ -| [ ]_| <-- vector loop. -| | - \ v + [ ] <-- vector loop bypass. + / | + / v + | [ ] <-- vector pre header. + | | + | v + | [ ] \ + | [ ]_| <-- vector loop. + | | + \ v >[ ] <--- middle-block. - / | - / v -| [ ] <--- new preheader. -| | -| v -| [ ] \ -| [ ]_| <-- old scalar loop to handle remainder. - \ | - \ v + / | + / v + | [ ] <--- new preheader. + | | + | v + | [ ] \ + | [ ]_| <-- old scalar loop to handle remainder. + \ | + \ v >[ ] <-- exit block. ... */ - // This is the original scalar-loop preheader. + BasicBlock *OldBasicBlock = OrigLoop->getHeader(); BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); - // The loop index does not have to start at Zero. It starts with this value. + // Some loops have a single integer induction variable, while other loops + // don't. One example is c++ iterators that often have multiple pointer + // induction variables. In the code below we also support a case where we + // don't have a single induction variable. OldInduction = Legal->getInduction(); - Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock); + Type *IdxTy = OldInduction ? OldInduction->getType() : + DL->getIntPtrType(SE->getContext()); + + // Find the loop boundaries. + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); + assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); + + // Get the total trip count from the count by adding 1. + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(ExitCount->getType(), 1)); + + // Expand the trip count and place the new instructions in the preheader. + // Notice that the pre-header does not change, only the loop body. + SCEVExpander Exp(*SE, "induction"); + + // Count holds the overall loop count (N). + Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + BypassBlock->getTerminator()); + + // The loop index does not have to start at Zero. Find the original start + // value from the induction PHI node. If we don't have an induction variable + // then we know that it starts at zero. + Value *StartIdx = OldInduction ? + OldInduction->getIncomingValueForBlock(BypassBlock): + ConstantInt::get(IdxTy, 0); - assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); - BasicBlock *VectorPH = - BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); - BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), - "vector.body"); + // Generate the code that checks in runtime if arrays overlap. + Value *MemoryRuntimeCheck = addRuntimeCheck(Legal, + BypassBlock->getTerminator()); - BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), - "middle.block"); + // Split the single block loop into the two loop structure described above. + BasicBlock *VectorPH = + BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); + BasicBlock *VecBody = + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); + BasicBlock *MiddleBlock = + VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = - MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), - "scalar.preheader"); - // Find the induction variable. - BasicBlock *OldBasicBlock = OrigLoop->getHeader(); - assert(OldInduction && "We must have a single phi node."); - Type *IdxTy = OldInduction->getType(); + MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); + + // This is the location in which we add all of the logic for bypassing + // the new vector loop. + Instruction *Loc = BypassBlock->getTerminator(); // Use this IR builder to create the loop instructions (Phi, Br, Cmp) // inside the loop. @@ -666,27 +495,16 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Induction = Builder.CreatePHI(IdxTy, 2, "index"); Constant *Step = ConstantInt::get(IdxTy, VF); - // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); - assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); - - // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(ExitCount->getType(), 1)); - - // Expand the trip count and place the new instructions in the preheader. - // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*SE, "induction"); - Instruction *Loc = BypassBlock->getTerminator(); - // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. - // We are using Zext because it should be less expensive. - if (ExitCount->getType() != Induction->getType()) - ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); - - // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); + if (Count->getType() != IdxTy) { + // The exit count can be of pointer type. Convert it to the correct + // integer type. + if (ExitCount->getType()->isPointerTy()) + Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); + else + Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); + } // Add the start index to the loop count to get the new end index. Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc); @@ -699,20 +517,112 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx, "end.idx.rnd.down", Loc); - // Now, compare the new count to zero. If it is zero, jump to the scalar part. + // Now, compare the new count to zero. If it is zero skip the vector loop and + // jump to the scalar loop. Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEndRoundDown, StartIdx, "cmp.zero", Loc); + + // If we are using memory runtime checks, include them in. + if (MemoryRuntimeCheck) + Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck, + "CntOrMem", Loc); + BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); // Remove the old terminator. Loc->eraseFromParent(); + // We are going to resume the execution of the scalar loop. + // Go over all of the induction variables that we found and fix the + // PHIs that are left in the scalar version of the loop. + // The starting values of PHI nodes depend on the counter of the last + // iteration in the vectorized loop. + // If we come from a bypass edge then we need to start from the original + // start value. + + // This variable saves the new starting index for the scalar loop. + PHINode *ResumeIndex = 0; + LoopVectorizationLegality::InductionList::iterator I, E; + LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); + for (I = List->begin(), E = List->end(); I != E; ++I) { + PHINode *OrigPhi = I->first; + LoopVectorizationLegality::InductionInfo II = I->second; + PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val", + MiddleBlock->getTerminator()); + Value *EndValue = 0; + switch (II.IK) { + case LoopVectorizationLegality::NoInduction: + llvm_unreachable("Unknown induction"); + case LoopVectorizationLegality::IntInduction: { + // Handle the integer induction counter: + assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); + assert(OrigPhi == OldInduction && "Unknown integer PHI"); + // We know what the end value is. + EndValue = IdxEndRoundDown; + // We also know which PHI node holds it. + ResumeIndex = ResumeVal; + break; + } + case LoopVectorizationLegality::ReverseIntInduction: { + // Convert the CountRoundDown variable to the PHI size. + unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits(); + unsigned IISize = II.StartValue->getType()->getScalarSizeInBits(); + Value *CRD = CountRoundDown; + if (CRDSize > IISize) + CRD = CastInst::Create(Instruction::Trunc, CountRoundDown, + II.StartValue->getType(), + "tr.crd", BypassBlock->getTerminator()); + else if (CRDSize < IISize) + CRD = CastInst::Create(Instruction::SExt, CountRoundDown, + II.StartValue->getType(), + "sext.crd", BypassBlock->getTerminator()); + // Handle reverse integer induction counter: + EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", + BypassBlock->getTerminator()); + break; + } + case LoopVectorizationLegality::PtrInduction: { + // For pointer induction variables, calculate the offset using + // the end index. + EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown, + "ptr.ind.end", + BypassBlock->getTerminator()); + break; + } + }// end of case + + // The new PHI merges the original incoming value, in case of a bypass, + // or the value at the end of the vectorized loop. + ResumeVal->addIncoming(II.StartValue, BypassBlock); + ResumeVal->addIncoming(EndValue, VecBody); + + // Fix the scalar body counter (PHI node). + unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); + OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + } + + // If we are generating a new induction variable then we also need to + // generate the code that calculates the exit value. This value is not + // simply the end of the counter because we may skip the vectorized body + // in case of a runtime check. + if (!OldInduction){ + assert(!ResumeIndex && "Unexpected resume value found"); + ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", + MiddleBlock->getTerminator()); + ResumeIndex->addIncoming(StartIdx, BypassBlock); + ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); + } + + // Make sure that we found the index where scalar loop needs to continue. + assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() && + "Invalid resume Index"); + // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd, - IdxEndRoundDown, "cmp.n", + ResumeIndex, "cmp.n", MiddleBlock->getTerminator()); BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); @@ -730,26 +640,25 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Now we have two terminators. Remove the old one from the block. VecBody->getTerminator()->eraseFromParent(); - // Fix the scalar body iteration count. - unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); - OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown); - // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); - // Register the new loop. + // Create and register the new vector loop. Loop* Lp = new Loop(); - LPM->insertLoop(Lp, OrigLoop->getParentLoop()); - - Lp->addBasicBlockToLoop(VecBody, LI->getBase()); - Loop *ParentLoop = OrigLoop->getParentLoop(); + + // Insert the new loop into the loop nest and register the new basic blocks. if (ParentLoop) { + ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); + } else { + LI->addTopLevelLoop(Lp); } + Lp->addBasicBlockToLoop(VecBody, LI->getBase()); + // Save the state. LoopVectorPreHeader = VectorPH; LoopScalarPreHeader = ScalarPH; @@ -781,8 +690,37 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K) { } } +static bool +isTriviallyVectorizableIntrinsic(Instruction *Inst) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); + if (!II) + return false; + switch (II->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::pow: + case Intrinsic::fma: + return true; + default: + return false; + } + return false; +} + void -SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { +InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { //===------------------------------------------------===// // // Notice: any optimization or new instruction that go @@ -790,202 +728,31 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - typedef SmallVector<PHINode*, 4> PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); - Constant *Zero = ConstantInt::get( - IntegerType::getInt32Ty(BB.getContext()), 0); + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0); // In order to support reduction variables we need to be able to vectorize // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two - // steages. First, we create a new vector PHI node with no incoming edges. + // stages. First, we create a new vector PHI node with no incoming edges. // We use this value when we vectorize all of the instructions that use the // PHI. Next, after all of the instructions in the block are complete we // add the new incoming edges to the PHI. At this point all of the // instructions in the basic block are vectorized, so we can use them to // construct the PHI. - PhiVector PHIsToFix; + PhiVector RdxPHIsToFix; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *Inst = it; + // Scan the loop in a topological order to ensure that defs are vectorized + // before users. + LoopBlocksDFS DFS(OrigLoop); + DFS.perform(LI); - switch (Inst->getOpcode()) { - case Instruction::Br: - // Nothing to do for PHIs and BR, since we already took care of the - // loop control flow instructions. - continue; - case Instruction::PHI:{ - PHINode* P = cast<PHINode>(Inst); - // Special handling for the induction var. - if (OldInduction == Inst) - continue; - // This is phase one of vectorizing PHIs. - // This has to be a reduction variable. - assert(Legal->getReductionVars()->count(P) && "Not a Reduction"); - Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi"); - PHIsToFix.push_back(P); - continue; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen binops. - BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - - // Use this vector value for all users of the original instruction. - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); - WidenMap[Inst] = V; - - // Update the NSW, NUW and Exact flags. - BinaryOperator *VecOp = cast<BinaryOperator>(V); - if (isa<OverflowingBinaryOperator>(BinOp)) { - VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); - VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); - } - if (isa<PossiblyExactOperator>(VecOp)) - VecOp->setIsExact(BinOp->isExact()); - break; - } - case Instruction::Select: { - // Widen selects. - // If the selector is loop invariant we can create a select - // instruction with a scalar condition. Otherwise, use vector-select. - Value *Cond = Inst->getOperand(0); - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - Cond = getVectorValue(Cond); - if (InvariantCond) - Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); - - Value *Op0 = getVectorValue(Inst->getOperand(1)); - Value *Op1 = getVectorValue(Inst->getOperand(2)); - WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1); - break; - } - - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (Inst->getOpcode() == Instruction::FCmp); - CmpInst *Cmp = dyn_cast<CmpInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - if (FCmp) - WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); - else - WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B); - break; - } - - case Instruction::Store: { - // Attempt to issue a wide store. - StoreInst *SI = dyn_cast<StoreInst>(Inst); - Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); - Value *Ptr = SI->getPointerOperand(); - unsigned Alignment = SI->getAlignment(); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - // This store does not use GEPs. - if (!Legal->isConsecutiveGep(Gep)) { - scalarizeInstruction(Inst); - break; - } + // Vectorize all of the blocks in the original loop. + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), + be = DFS.endRPO(); bb != be; ++bb) + vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix); - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); - Value *Val = getVectorValue(SI->getValueOperand()); - Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); - break; - } - case Instruction::Load: { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast<LoadInst>(Inst); - Type *RetTy = VectorType::get(LI->getType(), VF); - Value *Ptr = LI->getPointerOperand(); - unsigned Alignment = LI->getAlignment(); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - - // We don't have a gep. Scalarize the load. - if (!Legal->isConsecutiveGep(Gep)) { - scalarizeInstruction(Inst); - break; - } - - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); - LI = Builder.CreateLoad(Ptr); - LI->setAlignment(Alignment); - // Use this vector value for all users of the load. - WidenMap[Inst] = LI; - break; - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - /// Vectorize bitcasts. - CastInst *CI = dyn_cast<CastInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); - WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy); - break; - } - - default: - /// All other instructions are unsupported. Scalarize them. - scalarizeInstruction(Inst); - break; - }// end of switch. - }// end of for_each instr. - - // At this point every instruction in the original loop is widended to + // At this point every instruction in the original loop is widened to // a vector form. We are almost done. Now, we need to fix the PHI nodes // that we vectorized. The PHI nodes are currently empty because we did // not want to introduce cycles. Notice that the remaining PHI nodes @@ -994,7 +761,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Create the 'reduced' values for each of the induction vars. // The reduced values are the vector values that we scalarize and combine // after the loop is finished. - for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end(); + for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end(); it != e; ++it) { PHINode *RdxPhi = *it; PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]); @@ -1004,7 +771,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { assert(Legal->getReductionVars()->count(RdxPhi) && "Unable to find the reduction variable"); LoopVectorizationLegality::ReductionDescriptor RdxDesc = - (*Legal->getReductionVars())[RdxPhi]; + (*Legal->getReductionVars())[RdxPhi]; // We need to generate a reduction vector from the incoming scalar. // To do so, we need to generate the 'identity' vector and overide @@ -1024,8 +791,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // This vector is the Identity vector where the first element is the // incoming scalar reduction. Value *VectorStart = Builder.CreateInsertElement(Identity, - RdxDesc.StartValue, Zero); - + RdxDesc.StartValue, Zero); // Fix the vector-loop phi. // We created the induction variable so we know that the @@ -1035,8 +801,8 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Reductions do not have to start at zero. They can start with // any loop invariant values. VecRdxPhi->addIncoming(VectorStart, VecPreheader); - unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); + Value *Val = + getVectorValue(RdxPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); VecRdxPhi->addIncoming(Val, LoopVectorBody); // Before each round, move the insertion point right between @@ -1053,29 +819,29 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Extract the first scalar. Value *Scalar0 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); + Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); // Extract and reduce the remaining vector elements. for (unsigned i=1; i < VF; ++i) { Value *Scalar1 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); + Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); switch (RdxDesc.Kind) { - case LoopVectorizationLegality::IntegerAdd: - Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerMult: - Scalar0 = Builder.CreateMul(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerOr: - Scalar0 = Builder.CreateOr(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerAnd: - Scalar0 = Builder.CreateAnd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerXor: - Scalar0 = Builder.CreateXor(Scalar0, Scalar1); - break; - default: - llvm_unreachable("Unknown reduction operation"); + case LoopVectorizationLegality::IntegerAdd: + Scalar0 = Builder.CreateAdd(Scalar0, Scalar1, "add.rdx"); + break; + case LoopVectorizationLegality::IntegerMult: + Scalar0 = Builder.CreateMul(Scalar0, Scalar1, "mul.rdx"); + break; + case LoopVectorizationLegality::IntegerOr: + Scalar0 = Builder.CreateOr(Scalar0, Scalar1, "or.rdx"); + break; + case LoopVectorizationLegality::IntegerAnd: + Scalar0 = Builder.CreateAnd(Scalar0, Scalar1, "and.rdx"); + break; + case LoopVectorizationLegality::IntegerXor: + Scalar0 = Builder.CreateXor(Scalar0, Scalar1, "xor.rdx"); + break; + default: + llvm_unreachable("Unknown reduction operation"); } } @@ -1103,15 +869,373 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. + int IncomingEdgeBlockIdx = + (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch()); + assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); + // Pick the other block. + int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); }// end of for each redux variable. } -void SingleBlockLoopVectorizer::updateAnalysis() { - // The original basic block. +Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { + assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) && + "Invalid edge"); + + Value *SrcMask = createBlockInMask(Src); + + // The terminator has to be a branch inst! + BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator()); + assert(BI && "Unexpected terminator found"); + + Value *EdgeMask = SrcMask; + if (BI->isConditional()) { + EdgeMask = getVectorValue(BI->getCondition()); + if (BI->getSuccessor(0) != Dst) + EdgeMask = Builder.CreateNot(EdgeMask); + } + + return Builder.CreateAnd(EdgeMask, SrcMask); +} + +Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { + assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + + // Loop incoming mask is all-one. + if (OrigLoop->getHeader() == BB) { + Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); + return getVectorValue(C); + } + + // This is the block mask. We OR all incoming edges, and with zero. + Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0); + Value *BlockMask = getVectorValue(Zero); + + // For each pred: + for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) + BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB)); + + return BlockMask; +} + +void +InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, + BasicBlock *BB, PhiVector *PV) { + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0); + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + switch (it->getOpcode()) { + case Instruction::Br: + // Nothing to do for PHIs and BR, since we already took care of the + // loop control flow instructions. + continue; + case Instruction::PHI:{ + PHINode* P = cast<PHINode>(it); + // Handle reduction variables: + if (Legal->getReductionVars()->count(P)) { + // This is phase one of vectorizing PHIs. + Type *VecTy = VectorType::get(it->getType(), VF); + WidenMap[it] = + PHINode::Create(VecTy, 2, "vec.phi", + LoopVectorBody->getFirstInsertionPt()); + PV->push_back(P); + continue; + } + + // Check for PHI nodes that are lowered to vector selects. + if (P->getParent() != OrigLoop->getHeader()) { + // We know that all PHIs in non header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + Value *Cond = createBlockInMask(P->getIncomingBlock(0)); + WidenMap[P] = + Builder.CreateSelect(Cond, + getVectorValue(P->getIncomingValue(0)), + getVectorValue(P->getIncomingValue(1)), + "predphi"); + continue; + } + + // This PHINode must be an induction variable. + // Make sure that we know about it. + assert(Legal->getInductionVars()->count(P) && + "Not an induction variable"); + + LoopVectorizationLegality::InductionInfo II = + Legal->getInductionVars()->lookup(P); + + switch (II.IK) { + case LoopVectorizationLegality::NoInduction: + llvm_unreachable("Unknown induction"); + case LoopVectorizationLegality::IntInduction: { + assert(P == OldInduction && "Unexpected PHI"); + Value *Broadcasted = getBroadcastInstrs(Induction); + // After broadcasting the induction variable we need to make the + // vector consecutive by adding 0, 1, 2 ... + Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted); + WidenMap[OldInduction] = ConsecutiveInduction; + continue; + } + case LoopVectorizationLegality::ReverseIntInduction: + case LoopVectorizationLegality::PtrInduction: + // Handle reverse integer and pointer inductions. + Value *StartIdx = 0; + // If we have a single integer induction variable then use it. + // Otherwise, start counting at zero. + if (OldInduction) { + LoopVectorizationLegality::InductionInfo OldII = + Legal->getInductionVars()->lookup(OldInduction); + StartIdx = OldII.StartValue; + } else { + StartIdx = ConstantInt::get(Induction->getType(), 0); + } + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, + "normalized.idx"); + + // Handle the reverse integer induction variable case. + if (LoopVectorizationLegality::ReverseIntInduction == II.IK) { + IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType()); + Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, + "resize.norm.idx"); + Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, + "reverse.idx"); + + // This is a new value so do not hoist it out. + Value *Broadcasted = getBroadcastInstrs(ReverseInd); + // After broadcasting the induction variable we need to make the + // vector consecutive by adding ... -3, -2, -1, 0. + Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted, + true); + WidenMap[it] = ConsecutiveInduction; + continue; + } + + // Handle the pointer induction variable case. + assert(P->getType()->isPointerTy() && "Unexpected type."); + + // This is the vector of results. Notice that we don't generate + // vector geps because scalar geps result in better code. + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + Constant *Idx = ConstantInt::get(Induction->getType(), i); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, + "gep.idx"); + Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, + "next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), + "insert.gep"); + } + + WidenMap[it] = VecVal; + continue; + } + + }// End of PHI. + + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen binops. + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + + // Use this vector value for all users of the original instruction. + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + WidenMap[it] = V; + + // Update the NSW, NUW and Exact flags. + BinaryOperator *VecOp = cast<BinaryOperator>(V); + if (isa<OverflowingBinaryOperator>(BinOp)) { + VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); + VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(VecOp)) + VecOp->setIsExact(BinOp->isExact()); + break; + } + case Instruction::Select: { + // Widen selects. + // If the selector is loop invariant we can create a select + // instruction with a scalar condition. Otherwise, use vector-select. + Value *Cond = it->getOperand(0); + bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + Cond = getVectorValue(Cond); + if (InvariantCond) + Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); + + Value *Op0 = getVectorValue(it->getOperand(1)); + Value *Op1 = getVectorValue(it->getOperand(2)); + WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1); + break; + } + + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (it->getOpcode() == Instruction::FCmp); + CmpInst *Cmp = dyn_cast<CmpInst>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + if (FCmp) + WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + else + WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B); + break; + } + + case Instruction::Store: { + // Attempt to issue a wide store. + StoreInst *SI = dyn_cast<StoreInst>(it); + Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Value *Ptr = SI->getPointerOperand(); + unsigned Alignment = SI->getAlignment(); + + assert(!Legal->isUniform(Ptr) && + "We do not allow storing to uniform addresses"); + + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + + // This store does not use GEPs. + if (!Legal->isConsecutivePtr(Ptr)) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); + Value *Val = getVectorValue(SI->getValueOperand()); + Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); + break; + } + case Instruction::Load: { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast<LoadInst>(it); + Type *RetTy = VectorType::get(LI->getType(), VF); + Value *Ptr = LI->getPointerOperand(); + unsigned Alignment = LI->getAlignment(); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + + // If the pointer is loop invariant or if it is non consecutive, + // scalarize the load. + bool Con = Legal->isConsecutivePtr(Ptr); + if (Legal->isUniform(Ptr) || !Con) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + + Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); + LI = Builder.CreateLoad(Ptr); + LI->setAlignment(Alignment); + // Use this vector value for all users of the load. + WidenMap[it] = LI; + break; + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + /// Vectorize bitcasts. + CastInst *CI = dyn_cast<CastInst>(it); + Value *A = getVectorValue(it->getOperand(0)); + Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); + WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); + break; + } + + case Instruction::Call: { + assert(isTriviallyVectorizableIntrinsic(it)); + Module *M = BB->getParent()->getParent(); + IntrinsicInst *II = cast<IntrinsicInst>(it); + Intrinsic::ID ID = II->getIntrinsicID(); + SmallVector<Value*, 4> Args; + for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) + Args.push_back(getVectorValue(II->getArgOperand(i))); + Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) }; + Function *F = Intrinsic::getDeclaration(M, ID, Tys); + WidenMap[it] = Builder.CreateCall(F, Args); + break; + } + + default: + // All other instructions are unsupported. Scalarize them. + scalarizeInstruction(it); + break; + }// end of switch. + }// end of for_each instr. +} + +void InnerLoopVectorizer::updateAnalysis() { + // Forget the original basic block. SE->forgetLoop(OrigLoop); // Update the dominator tree information. @@ -1128,46 +1252,93 @@ void SingleBlockLoopVectorizer::updateAnalysis() { DEBUG(DT->verifyAnalysis()); } -bool LoopVectorizationLegality::canVectorize() { - if (!TheLoop->getLoopPreheader()) { - assert(false && "No preheader!!"); - DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); - return false; +bool LoopVectorizationLegality::canVectorizeWithIfConvert() { + if (!EnableIfConversion) + return false; + + assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); + std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector(); + + // Collect the blocks that need predication. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // We must have at most two predecessors because we need to convert + // all PHIs to selects. + unsigned Preds = std::distance(pred_begin(BB), pred_end(BB)); + if (Preds > 2) + return false; + + // We must be able to predicate all blocks that need to be predicated. + if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) + return false; } - // We can only vectorize single basic block loops. - unsigned NumBlocks = TheLoop->getNumBlocks(); - if (NumBlocks != 1) { - DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); + // We can if-convert this loop. + return true; +} + +bool LoopVectorizationLegality::canVectorize() { + assert(TheLoop->getLoopPreheader() && "No preheader!!"); + + // We can only vectorize innermost loops. + if (TheLoop->getSubLoopsVector().size()) return false; - } - // We need to have a loop header. - BasicBlock *BB = TheLoop->getHeader(); - DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); + // We must have a single backedge. + if (TheLoop->getNumBackEdges() != 1) + return false; - // Go over each instruction and look at memory deps. - if (!canVectorizeBlock(*BB)) { - DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); + // We must have a single exiting block. + if (!TheLoop->getExitingBlock()) + return false; + + unsigned NumBlocks = TheLoop->getNumBlocks(); + + // Check if we can if-convert non single-bb loops. + if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { + DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); return false; } + // We need to have a loop header. + BasicBlock *Latch = TheLoop->getLoopLatch(); + DEBUG(dbgs() << "LV: Found a loop: " << + TheLoop->getHeader()->getName() << "\n"); + // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); + const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch); if (ExitCount == SE->getCouldNotCompute()) { DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } // Do not loop-vectorize loops with a tiny trip count. - unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB); + unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); if (TC > 0u && TC < TinyTripCountThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is not worth vectorizing.\n"); return false; } - DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); + // Check if we can vectorize the instructions and CFG in this loop. + if (!canVectorizeInstrs()) { + DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); + return false; + } + + // Go over each instruction and look at memory deps. + if (!canVectorizeMemory()) { + DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); + return false; + } + + // Collect all of the variables that remain uniform after vectorization. + collectLoopUniforms(); + + DEBUG(dbgs() << "LV: We can vectorize this loop" << + (PtrRtCheck.Need ? " (with a runtime bound check)" : "") + <<"!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with @@ -1175,165 +1346,194 @@ bool LoopVectorizationLegality::canVectorize() { return true; } -bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { - // Scan the instructions in the block and look for hazards. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *I = it; +bool LoopVectorizationLegality::canVectorizeInstrs() { + BasicBlock *PreHeader = TheLoop->getLoopPreheader(); + BasicBlock *Header = TheLoop->getHeader(); - PHINode *Phi = dyn_cast<PHINode>(I); - if (Phi) { - // This should not happen because the loop should be normalized. - if (Phi->getNumIncomingValues() != 2) { - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); - return false; - } - // We only look at integer phi nodes. - if (!Phi->getType()->isIntegerTy()) { - DEBUG(dbgs() << "LV: Found an non-int PHI.\n"); - return false; - } + // For each block in the loop. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { - if (isInductionVariable(Phi)) { - if (Induction) { - DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); + // Scan the instructions in the block and look for hazards. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + + if (PHINode *Phi = dyn_cast<PHINode>(it)) { + // This should not happen because the loop should be normalized. + if (Phi->getNumIncomingValues() != 2) { + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } - DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n"); - Induction = Phi; - continue; - } - if (AddReductionVar(Phi, IntegerAdd)) { - DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerMult)) { - DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerOr)) { - DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerAnd)) { - DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); - continue; - } - if (AddReductionVar(Phi, IntegerXor)) { - DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); - continue; - } - DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); - return false; - }// end of PHI handling + // Check that this PHI type is allowed. + if (!Phi->getType()->isIntegerTy() && + !Phi->getType()->isPointerTy()) { + DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); + return false; + } - // We still don't handle functions. - CallInst *CI = dyn_cast<CallInst>(I); - if (CI) { - DEBUG(dbgs() << "LV: Found a call site.\n"); - return false; - } + // If this PHINode is not in the header block, then we know that we + // can convert it to select during if-conversion. No need to check if + // the PHIs in this block are induction or reduction variables. + if (*bb != Header) + continue; - // We do not re-vectorize vectors. - if (!VectorType::isValidElementType(I->getType()) && - !I->getType()->isVoidTy()) { - DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); - return false; - } + // This is the value coming from the preheader. + Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + // Check if this is an induction variable. + InductionKind IK = isInductionVariable(Phi); + + if (NoInduction != IK) { + // Int inductions are special because we only allow one IV. + if (IK == IntInduction) { + if (Induction) { + DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); + return false; + } + Induction = Phi; + } + + DEBUG(dbgs() << "LV: Found an induction variable.\n"); + Inductions[Phi] = InductionInfo(StartValue, IK); + continue; + } - // Reduction instructions are allowed to have exit users. - // All other instructions must not have external users. - if (!AllowedExit.count(I)) - //Check that all of the users of the loop are inside the BB. - for (Value::use_iterator it = I->use_begin(), e = I->use_end(); - it != e; ++it) { - Instruction *U = cast<Instruction>(*it); - // This user may be a reduction exit value. - BasicBlock *Parent = U->getParent(); - if (Parent != &BB) { - DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); - return false; + if (AddReductionVar(Phi, IntegerAdd)) { + DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); + continue; } - } - } // next instr. + if (AddReductionVar(Phi, IntegerMult)) { + DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerOr)) { + DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerAnd)) { + DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerXor)) { + DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); + continue; + } + + DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); + return false; + }// end of PHI handling + + // We still don't handle functions. + CallInst *CI = dyn_cast<CallInst>(it); + if (CI && !isTriviallyVectorizableIntrinsic(it)) { + DEBUG(dbgs() << "LV: Found a call site.\n"); + return false; + } + + // We do not re-vectorize vectors. + if (!VectorType::isValidElementType(it->getType()) && + !it->getType()->isVoidTy()) { + DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); + return false; + } + + // Reduction instructions are allowed to have exit users. + // All other instructions must not have external users. + if (!AllowedExit.count(it)) + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator I = it->use_begin(), E = it->use_end(); + I != E; ++I) { + Instruction *U = cast<Instruction>(*I); + // This user may be a reduction exit value. + if (!TheLoop->contains(U)) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return false; + } + } + } // next instr. + + } if (!Induction) { - DEBUG(dbgs() << "LV: Did not find an induction var.\n"); - return false; + DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); + assert(getInductionVars()->size() && "No induction variables"); } - // Don't vectorize if the memory dependencies do not allow vectorization. - if (!canVectorizeMemory(BB)) - return false; + return true; +} +void LoopVectorizationLegality::collectLoopUniforms() { // We now know that the loop is vectorizable! // Collect variables that will remain uniform after vectorization. std::vector<Value*> Worklist; + BasicBlock *Latch = TheLoop->getLoopLatch(); // Start with the conditional branch and walk up the block. - Worklist.push_back(BB.getTerminator()->getOperand(0)); + Worklist.push_back(Latch->getTerminator()->getOperand(0)); while (Worklist.size()) { Instruction *I = dyn_cast<Instruction>(Worklist.back()); Worklist.pop_back(); - // Look at instructions inside this block. - if (!I) continue; - if (I->getParent() != &BB) continue; + // Look at instructions inside this loop. // Stop when reaching PHI nodes. - if (isa<PHINode>(I)) { - assert(I == Induction && "Found a uniform PHI that is not the induction"); - break; - } + // TODO: we need to follow values all over the loop, not only in this block. + if (!I || !TheLoop->contains(I) || isa<PHINode>(I)) + continue; // This is a known uniform. Uniforms.insert(I); // Insert all operands. - for (int i=0, Op = I->getNumOperands(); i < Op; ++i) { + for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) { Worklist.push_back(I->getOperand(i)); } } - - return true; } -bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { +bool LoopVectorizationLegality::canVectorizeMemory() { typedef SmallVector<Value*, 16> ValueVector; typedef SmallPtrSet<Value*, 16> ValueSet; // Holds the Load and Store *instructions*. ValueVector Loads; ValueVector Stores; - - // Scan the BB and collect legal loads and stores. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *I = it; - - // If this is a load, save it. If this instruction can read from memory - // but is not a load, then we quit. Notice that we don't handle function - // calls that read or write. - if (I->mayReadFromMemory()) { - LoadInst *Ld = dyn_cast<LoadInst>(I); - if (!Ld) return false; - if (!Ld->isSimple()) { - DEBUG(dbgs() << "LV: Found a non-simple load.\n"); - return false; + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; + + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + + // If this is a load, save it. If this instruction can read from memory + // but is not a load, then we quit. Notice that we don't handle function + // calls that read or write. + if (it->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast<LoadInst>(it); + if (!Ld) return false; + if (!Ld->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple load.\n"); + return false; + } + Loads.push_back(Ld); + continue; } - Loads.push_back(Ld); - continue; - } - // Save store instructions. Abort if other instructions write to memory. - if (I->mayWriteToMemory()) { - StoreInst *St = dyn_cast<StoreInst>(I); - if (!St) return false; - if (!St->isSimple()) { - DEBUG(dbgs() << "LV: Found a non-simple store.\n"); - return false; + // Save 'store' instructions. Abort if other instructions write to memory. + if (it->mayWriteToMemory()) { + StoreInst *St = dyn_cast<StoreInst>(it); + if (!St) return false; + if (!St->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple store.\n"); + return false; + } + Stores.push_back(St); } - Stores.push_back(St); - } - } // next instr. + } // next instr. + } // next block. // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -1341,8 +1541,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // Check if we see any stores. If there are no stores, then we don't // care if the pointers are *restrict*. if (!Stores.size()) { - DEBUG(dbgs() << "LV: Found a read-only loop!\n"); - return true; + DEBUG(dbgs() << "LV: Found a read-only loop!\n"); + return true; } // Holds the read and read-write *pointers* that we find. @@ -1361,6 +1561,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { StoreInst *ST = dyn_cast<StoreInst>(*I); assert(ST && "Bad StoreInst"); Value* Ptr = ST->getPointerOperand(); + + if (isUniform(Ptr)) { + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } + // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) @@ -1379,7 +1585,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // If the address of i is unknown (for example A[B[i]]) then we may // read a few words, modify, and write a few words, and some of the // words may be written to the same address. - if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr)) + if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr)) Reads.push_back(Ptr); } @@ -1390,6 +1596,39 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { return true; } + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool RT = true; + for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.insert(SE, TheLoop, *I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.insert(SE, TheLoop, *I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + + // Check that we did not collect too many pointers or found a + // unsizeable pointer. + if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { + PtrRtCheck.reset(); + RT = false; + } + + PtrRtCheck.Need = RT; + + if (RT) { + DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n"); + } + // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. @@ -1404,12 +1643,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); - return false; + return RT; } if (!WriteObjects.insert(*it)) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); @@ -1422,18 +1661,20 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); - return false; + return RT; } if (WriteObjects.count(*it)) { DEBUG(dbgs() << "LV: Found a possible read/write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); } - // All is okay. + // It is safe to vectorize and we don't need any runtime checks. + DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n"); + PtrRtCheck.reset(); return true; } @@ -1442,11 +1683,13 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (Phi->getNumIncomingValues() != 2) return false; - // Find the possible incoming reduction variable. - BasicBlock *BB = Phi->getParent(); - int SelfEdgeIdx = Phi->getBasicBlockIndex(BB); - int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry. - Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx); + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); // ExitInstruction is the single value which is used outside the loop. // We only allow for a single reduction value to be used outside the loop. @@ -1461,20 +1704,20 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Also, we can't have multiple block-local users. Instruction *Iter = Phi; while (true) { + // If the instruction has no users then this is a broken + // chain and can't be a reduction variable. + if (Iter->use_empty()) + return false; + // Any reduction instr must be of one of the allowed kinds. if (!isReductionInstr(Iter, Kind)) return false; - // Did we found a user inside this block ? + // Did we find a user inside this block ? bool FoundInBlockUser = false; // Did we reach the initial PHI node ? bool FoundStartPHI = false; - // If the instruction has no users then this is a broken - // chain and can't be a reduction variable. - if (Iter->use_empty()) - return false; - // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -1484,14 +1727,23 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, FoundStartPHI = true; continue; } + // Check if we found the exit user. BasicBlock *Parent = U->getParent(); - if (Parent != BB) { - // We must have a single exit instruction. + if (!TheLoop->contains(Parent)) { + // Exit if you find multiple outside users. if (ExitInstruction != 0) return false; ExitInstruction = Iter; } + + // We allow in-loop PHINodes which are not the original reduction PHI + // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE + // structure) then don't skip this PHI. + if (isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() && + TheLoop->contains(U) && Iter->getNumUses() > 1) + continue; + // We can't have multiple inside users. if (FoundInBlockUser) return false; @@ -1502,60 +1754,121 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. - if (FoundStartPHI && ExitInstruction) { - // This instruction is allowed to have out-of-loop users. - AllowedExit.insert(ExitInstruction); - - // Save the description of this reduction variable. - ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); - Reductions[Phi] = RD; - return true; - } + if (FoundStartPHI && ExitInstruction) { + // This instruction is allowed to have out-of-loop users. + AllowedExit.insert(ExitInstruction); + + // Save the description of this reduction variable. + ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); + Reductions[Phi] = RD; + return true; + } + + // If we've reached the start PHI but did not find an outside user then + // this is dead code. Abort. + if (FoundStartPHI) + return false; } } bool LoopVectorizationLegality::isReductionInstr(Instruction *I, ReductionKind Kind) { - switch (I->getOpcode()) { - default: - return false; - case Instruction::PHI: - // possibly. - return true; - case Instruction::Add: - case Instruction::Sub: - return Kind == IntegerAdd; - case Instruction::Mul: - case Instruction::UDiv: - case Instruction::SDiv: - return Kind == IntegerMult; - case Instruction::And: - return Kind == IntegerAnd; - case Instruction::Or: - return Kind == IntegerOr; - case Instruction::Xor: - return Kind == IntegerXor; - } + switch (I->getOpcode()) { + default: + return false; + case Instruction::PHI: + // possibly. + return true; + case Instruction::Add: + case Instruction::Sub: + return Kind == IntegerAdd; + case Instruction::Mul: + return Kind == IntegerMult; + case Instruction::And: + return Kind == IntegerAnd; + case Instruction::Or: + return Kind == IntegerOr; + case Instruction::Xor: + return Kind == IntegerXor; + } } -bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { +LoopVectorizationLegality::InductionKind +LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return NoInduction; + // Check that the PHI is consecutive and starts at zero. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return false; + return NoInduction; } const SCEV *Step = AR->getStepRecurrence(*SE); - if (!Step->isOne()) { - DEBUG(dbgs() << "LV: PHI stride does not equal one.\n"); - return false; + // Integer inductions need to have a stride of one. + if (PhiTy->isIntegerTy()) { + if (Step->isOne()) + return IntInduction; + if (Step->isAllOnesValue()) + return ReverseIntInduction; + return NoInduction; + } + + // Calculate the pointer stride and check if it is consecutive. + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) + return NoInduction; + + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); + if (C->getValue()->equalsInt(Size)) + return PtrInduction; + + return NoInduction; +} + +bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { + assert(TheLoop->contains(BB) && "Unknown block used"); + + // Blocks that do not dominate the latch need predication. + BasicBlock* Latch = TheLoop->getLoopLatch(); + return !DT->dominates(BB, Latch); +} + +bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // We don't predicate loads/stores at the moment. + if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + return false; + + // The isntructions below can trap. + switch (it->getOpcode()) { + default: continue; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return false; + } } + return true; } +bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { + const SCEV *PhiScev = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) + return false; + + return AR->isAffine(); +} + unsigned LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { if (!VTTI) { @@ -1584,19 +1897,29 @@ LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { } unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { - // We can only estimate the cost of single basic block loops. - assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop"); - - BasicBlock *BB = TheLoop->getHeader(); unsigned Cost = 0; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - Instruction *Inst = it; - unsigned C = getInstructionCost(Inst, VF); - Cost += C; - DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF << - " For instruction: "<< *Inst << "\n"); + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + unsigned BlockCost = 0; + BasicBlock *BB = *bb; + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + unsigned C = getInstructionCost(it, VF); + Cost += C; + DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << + VF << " For instruction: "<< *it << "\n"); + } + + // We assume that if-converted blocks have a 50% chance of being executed. + // When the code is scalar then some of the blocks are avoided due to CF. + // When the code is vectorized we execute all code paths. + if (Legal->blockNeedsPredication(*bb) && VF == 1) + BlockCost /= 2; + + Cost += BlockCost; } return Cost; @@ -1614,147 +1937,156 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Type *RetTy = I->getType(); Type *VectorTy = ToVectorTy(RetTy, VF); - // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { - case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the intruction addressing mode. At the moment we don't - // generate vector geps. - return 0; - case Instruction::Br: { - return VTTI->getCFInstrCost(I->getOpcode()); - } - case Instruction::PHI: - return 0; - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); - } - case Instruction::Select: { - SelectInst *SI = cast<SelectInst>(I); - const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); - bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); - Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) - CondTy = VectorType::get(CondTy, VF); - - return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); - } - case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - VectorTy = ToVectorTy(ValTy, VF); - return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); - } - case Instruction::Store: { - StoreInst *SI = cast<StoreInst>(I); - Type *ValTy = SI->getValueOperand()->getType(); - VectorTy = ToVectorTy(ValTy, VF); - - if (VF == 1) - return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), SI->getPointerAddressSpace()); - - // Scalarized stores. - if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { - unsigned Cost = 0; - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - ValTy); - // The cost of extracting from the value vector. - Cost += VF * (ExtCost); - // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - ValTy->getScalarType(), - SI->getAlignment(), - SI->getPointerAddressSpace()); - return Cost; - } - - // Wide stores. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), + case Instruction::GetElementPtr: + // We mark this instruction as zero-cost because scalar GEPs are usually + // lowered to the intruction addressing mode. At the moment we don't + // generate vector geps. + return 0; + case Instruction::Br: { + return VTTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::PHI: + //TODO: IF-converted IFs become selects. + return 0; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); + bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); + Type *CondTy = SI->getCondition()->getType(); + if (ScalarCond) + CondTy = VectorType::get(CondTy, VF); + + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + VectorTy = ToVectorTy(ValTy, VF); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + VectorTy = ToVectorTy(ValTy, VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), SI->getPointerAddressSpace()); + + // Scalarized stores. + if (!Legal->isConsecutivePtr(SI->getPointerOperand())) { + unsigned Cost = 0; + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + ValTy); + // The cost of extracting from the value vector. + Cost += VF * (ExtCost); + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + ValTy->getScalarType(), + SI->getAlignment(), + SI->getPointerAddressSpace()); + return Cost; } - case Instruction::Load: { - LoadInst *LI = cast<LoadInst>(I); - - if (VF == 1) - return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, - LI->getAlignment(), - LI->getPointerAddressSpace()); - - // Scalarized loads. - if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { - unsigned Cost = 0; - unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); - // The cost of inserting the loaded value into the result vector. - Cost += VF * (InCost); - // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - RetTy->getScalarType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); - return Cost; - } - // Wide loads. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), + // Wide stores. + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(I); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, + LI->getAlignment(), LI->getPointerAddressSpace()); - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); - return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); - } - default: { - // We are scalarizing the instruction. Return the cost of the scalar - // instruction, plus the cost of insert and extract into vector - // elements, times the vector width. + + // Scalarized loads. + if (!Legal->isConsecutivePtr(LI->getPointerOperand())) { unsigned Cost = 0; + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); + // The cost of inserting the loaded value into the result vector. + Cost += VF * (InCost); + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + RetTy->getScalarType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + return Cost; + } - bool IsVoid = RetTy->isVoidTy(); + // Wide loads. + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); + return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); + } + case Instruction::Call: { + assert(isTriviallyVectorizableIntrinsic(I)); + IntrinsicInst *II = cast<IntrinsicInst>(I); + Type *RetTy = ToVectorTy(II->getType(), VF); + SmallVector<Type*, 4> Tys; + for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF)); + return VTTI->getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys); + } + default: { + // We are scalarizing the instruction. Return the cost of the scalar + // instruction, plus the cost of insert and extract into vector + // elements, times the vector width. + unsigned Cost = 0; - unsigned InsCost = (IsVoid ? 0 : - VTTI->getInstrCost(Instruction::InsertElement, - VectorTy)); + bool IsVoid = RetTy->isVoidTy(); - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - VectorTy); + unsigned InsCost = (IsVoid ? 0 : + VTTI->getInstrCost(Instruction::InsertElement, + VectorTy)); - // The cost of inserting the results plus extracting each one of the - // operands. - Cost += VF * (InsCost + ExtCost * I->getNumOperands()); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VectorTy); - // The cost of executing VF copies of the scalar instruction. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); - return Cost; - } + // The cost of inserting the results plus extracting each one of the + // operands. + Cost += VF * (InsCost + ExtCost * I->getNumOperands()); + + // The cost of executing VF copies of the scalar instruction. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); + return Cost; + } }// end of switch. } @@ -1764,8 +2096,6 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { return VectorType::get(Scalar, VF); } -} // namespace - char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) @@ -1780,3 +2110,4 @@ namespace llvm { } } + diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h new file mode 100644 index 0000000000..9d6d80e22b --- /dev/null +++ b/lib/Transforms/Vectorize/LoopVectorize.h @@ -0,0 +1,458 @@ +//===- LoopVectorize.h --- A Loop Vectorizer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops +// and generates target-independent LLVM-IR. Legalization of the IR is done +// in the codegen. However, the vectorizes uses (will use) the codegen +// interfaces to generate IR that is likely to result in an optimal binary. +// +// The loop vectorizer combines consecutive loop iteration into a single +// 'wide' iteration. After this transformation the index is incremented +// by the SIMD vector width, and not by one. +// +// This pass has three parts: +// 1. The main loop pass that drives the different parts. +// 2. LoopVectorizationLegality - A unit that checks for the legality +// of the vectorization. +// 3. InnerLoopVectorizer - A unit that performs the actual +// widening of instructions. +// 4. LoopVectorizationCostModel - A unit that checks for the profitability +// of vectorization. It decides on the optimal vector width, which +// can be one, if vectorization is not profitable. +// +//===----------------------------------------------------------------------===// +// +// The reduction-variable vectorization is based on the paper: +// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. +// +// Variable uniformity checks are inspired by: +// Karrenberg, R. and Hack, S. Whole Function Vectorization. +// +// Other ideas/concepts are from: +// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. +// +// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of +// Vectorizing Compilers. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H +#define LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H + +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IRBuilder.h" + +#include <algorithm> +using namespace llvm; + +/// We don't vectorize loops with a known constant trip count below this number. +const unsigned TinyTripCountThreshold = 16; + +/// When performing a runtime memory check, do not check more than this +/// number of pointers. Notice that the check is quadratic! +const unsigned RuntimeMemoryCheckThreshold = 4; + +/// This is the highest vector width that we try to generate. +const unsigned MaxVectorSize = 8; + +namespace llvm { + +// Forward declarations. +class LoopVectorizationLegality; +class LoopVectorizationCostModel; +class VectorTargetTransformInfo; + +/// InnerLoopVectorizer vectorizes loops which contain only one basic +/// block to a specified vectorization factor (VF). +/// This class performs the widening of scalars into vectors, or multiple +/// scalars. This class also implements the following features: +/// * It inserts an epilogue loop for handling loops that don't have iteration +/// counts that are known to be a multiple of the vectorization factor. +/// * It handles the code generation for reduction variables. +/// * Scalarization (implementation using scalars) of un-vectorizable +/// instructions. +/// InnerLoopVectorizer does not perform any vectorization-legality +/// checks, and relies on the caller to check for the different legality +/// aspects. The InnerLoopVectorizer relies on the +/// LoopVectorizationLegality class to provide information about the induction +/// and reduction variables that were found to a given vectorization factor. +class InnerLoopVectorizer { +public: + /// Ctor. + InnerLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, + DominatorTree *Dt, DataLayout *Dl, unsigned VecWidth): + OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), VF(VecWidth), + Builder(Se->getContext()), Induction(0), OldInduction(0) { } + + // Perform the actual loop widening (vectorization). + void vectorize(LoopVectorizationLegality *Legal) { + // Create a new empty loop. Unlink the old loop and connect the new one. + createEmptyLoop(Legal); + // Widen each instruction in the old loop to a new one in the new loop. + // Use the Legality module to find the induction and reduction variables. + vectorizeLoop(Legal); + // Register the new loop and update the analysis passes. + updateAnalysis(); + } + +private: + /// A small list of PHINodes. + typedef SmallVector<PHINode*, 4> PhiVector; + + /// Add code that checks at runtime if the accessed arrays overlap. + /// Returns the comparator value or NULL if no check is needed. + Value *addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc); + /// Create an empty loop, based on the loop ranges of the old loop. + void createEmptyLoop(LoopVectorizationLegality *Legal); + /// Copy and widen the instructions from the old loop. + void vectorizeLoop(LoopVectorizationLegality *Legal); + + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + Value *createBlockInMask(BasicBlock *BB); + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); + + /// A helper function to vectorize a single BB within the innermost loop. + void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, + PhiVector *PV); + + /// Insert the new loop to the loop hierarchy and pass manager + /// and update the analysis passes. + void updateAnalysis(); + + /// This instruction is un-vectorizable. Implement it as a sequence + /// of scalars. + void scalarizeInstruction(Instruction *Instr); + + /// Create a broadcast instruction. This method generates a broadcast + /// instruction (shuffle) for loop invariant values and for the induction + /// value. If this is the induction variable then we extend it to N, N+1, ... + /// this is needed because each iteration in the loop corresponds to a SIMD + /// element. + Value *getBroadcastInstrs(Value *V); + + /// This function adds 0, 1, 2 ... to each vector element, starting at zero. + /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). + Value *getConsecutiveVector(Value* Val, bool Negate = false); + + /// When we go over instructions in the basic block we rely on previous + /// values within the current basic block or on loop invariant values. + /// When we widen (vectorize) values we place them in the map. If the values + /// are not within the map, they have to be loop invariant, so we simply + /// broadcast them into a vector. + Value *getVectorValue(Value *V); + + /// Get a uniform vector of constant integers. We use this to get + /// vectors of ones and zeros for the reduction code. + Constant* getUniformVector(unsigned Val, Type* ScalarTy); + + typedef DenseMap<Value*, Value*> ValueMap; + + /// The original loop. + Loop *OrigLoop; + // Scev analysis to use. + ScalarEvolution *SE; + // Loop Info. + LoopInfo *LI; + // Dominator Tree. + DominatorTree *DT; + // Data Layout. + DataLayout *DL; + // The vectorization factor to use. + unsigned VF; + + // The builder that we use + IRBuilder<> Builder; + + // --- Vectorization state --- + + /// The vector-loop preheader. + BasicBlock *LoopVectorPreHeader; + /// The scalar-loop preheader. + BasicBlock *LoopScalarPreHeader; + /// Middle Block between the vector and the scalar. + BasicBlock *LoopMiddleBlock; + ///The ExitBlock of the scalar loop. + BasicBlock *LoopExitBlock; + ///The vector loop body. + BasicBlock *LoopVectorBody; + ///The scalar loop body. + BasicBlock *LoopScalarBody; + ///The first bypass block. + BasicBlock *LoopBypassBlock; + + /// The new Induction variable which was added to the new block. + PHINode *Induction; + /// The induction variable of the old basic block. + PHINode *OldInduction; + // Maps scalars to widened vectors. + ValueMap WidenMap; +}; + +/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and +/// to what vectorization factor. +/// This class does not look at the profitability of vectorization, only the +/// legality. This class has two main kinds of checks: +/// * Memory checks - The code in canVectorizeMemory checks if vectorization +/// will change the order of memory accesses in a way that will change the +/// correctness of the program. +/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory +/// checks for a number of different conditions, such as the availability of a +/// single induction variable, that all types are supported and vectorize-able, +/// etc. This code reflects the capabilities of InnerLoopVectorizer. +/// This class is also used by InnerLoopVectorizer for identifying +/// induction variable and the different reduction variables. +class LoopVectorizationLegality { +public: + LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl, + DominatorTree *Dt): + TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { } + + /// This enum represents the kinds of reductions that we support. + enum ReductionKind { + NoReduction, /// Not a reduction. + IntegerAdd, /// Sum of numbers. + IntegerMult, /// Product of numbers. + IntegerOr, /// Bitwise or logical OR of numbers. + IntegerAnd, /// Bitwise or logical AND of numbers. + IntegerXor /// Bitwise or logical XOR of numbers. + }; + + /// This enum represents the kinds of inductions that we support. + enum InductionKind { + NoInduction, /// Not an induction variable. + IntInduction, /// Integer induction variable. Step = 1. + ReverseIntInduction, /// Reverse int induction variable. Step = -1. + PtrInduction /// Pointer induction variable. Step = sizeof(elem). + }; + + /// This POD struct holds information about reduction variables. + struct ReductionDescriptor { + // Default C'tor + ReductionDescriptor(): + StartValue(0), LoopExitInstr(0), Kind(NoReduction) {} + + // C'tor. + ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K): + StartValue(Start), LoopExitInstr(Exit), Kind(K) {} + + // The starting value of the reduction. + // It does not have to be zero! + Value *StartValue; + // The instruction who's value is used outside the loop. + Instruction *LoopExitInstr; + // The kind of the reduction. + ReductionKind Kind; + }; + + // This POD struct holds information about the memory runtime legality + // check that a group of pointers do not overlap. + struct RuntimePointerCheck { + RuntimePointerCheck(): Need(false) {} + + /// Reset the state of the pointer runtime information. + void reset() { + Need = false; + Pointers.clear(); + Starts.clear(); + Ends.clear(); + } + + /// Insert a pointer and calculate the start and end SCEVs. + void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr); + + /// This flag indicates if we need to add the runtime check. + bool Need; + /// Holds the pointers that we need to check. + SmallVector<Value*, 2> Pointers; + /// Holds the pointer value at the beginning of the loop. + SmallVector<const SCEV*, 2> Starts; + /// Holds the pointer value at the end of the loop. + SmallVector<const SCEV*, 2> Ends; + }; + + /// A POD for saving information about induction variables. + struct InductionInfo { + /// Ctors. + InductionInfo(Value *Start, InductionKind K): + StartValue(Start), IK(K) {}; + InductionInfo(): StartValue(0), IK(NoInduction) {}; + /// Start value. + Value *StartValue; + /// Induction kind. + InductionKind IK; + }; + + /// ReductionList contains the reduction descriptors for all + /// of the reductions that were found in the loop. + typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; + + /// InductionList saves induction variables and maps them to the + /// induction descriptor. + typedef DenseMap<PHINode*, InductionInfo> InductionList; + + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); + + /// Returns the Induction variable. + PHINode *getInduction() {return Induction;} + + /// Returns the reduction variables found in the loop. + ReductionList *getReductionVars() { return &Reductions; } + + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + + /// Return true if the block BB needs to be predicated in order for the loop + /// to be vectorized. + bool blockNeedsPredication(BasicBlock *BB); + + /// Check if this pointer is consecutive when vectorizing. This happens + /// when the last index of the GEP is the induction variable, or that the + /// pointer itself is an induction variable. + /// This check allows us to vectorize A[idx] into a wide load/store. + bool isConsecutivePtr(Value *Ptr); + + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + + /// Returns the information that we collected about runtime memory check. + RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } +private: + /// Check if a single basic block loop is vectorizable. + /// At this point we know that this is a loop with a constant trip count + /// and we only need to check individual instructions. + bool canVectorizeInstrs(); + + /// When we vectorize loops we may change the order in which + /// we read and write from memory. This method checks if it is + /// legal to vectorize the code, considering only memory constrains. + /// Returns true if the loop is vectorizable + bool canVectorizeMemory(); + + /// Return true if we can vectorize this loop using the IF-conversion + /// transformation. + bool canVectorizeWithIfConvert(); + + /// Collect the variables that need to stay uniform after vectorization. + void collectLoopUniforms(); + + /// Return true if all of the instructions in the block can be speculatively + /// executed. + bool blockCanBePredicated(BasicBlock *BB); + + /// Returns True, if 'Phi' is the kind of reduction variable for type + /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. + bool AddReductionVar(PHINode *Phi, ReductionKind Kind); + /// Returns true if the instruction I can be a reduction variable of type + /// 'Kind'. + bool isReductionInstr(Instruction *I, ReductionKind Kind); + /// Returns the induction kind of Phi. This function may return NoInduction + /// if the PHI is not an induction variable. + InductionKind isInductionVariable(PHINode *Phi); + /// Return true if can compute the address bounds of Ptr within the loop. + bool hasComputableBounds(Value *Ptr); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + /// DataLayout analysis. + DataLayout *DL; + // Dominators. + DominatorTree *DT; + + // --- vectorization state --- // + + /// Holds the integer induction variable. This is the counter of the + /// loop. + PHINode *Induction; + /// Holds the reduction variables. + ReductionList Reductions; + /// Holds all of the induction variables that we found in the loop. + /// Notice that inductions don't need to start at zero and that induction + /// variables can be pointers. + InductionList Inductions; + + /// Allowed outside users. This holds the reduction + /// vars which can be accessed from outside the loop. + SmallPtrSet<Value*, 4> AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet<Instruction*, 4> Uniforms; + /// We need to check that all of the pointers in this list are disjoint + /// at runtime. + RuntimePointerCheck PtrRtCheck; +}; + +/// LoopVectorizationCostModel - estimates the expected speedups due to +/// vectorization. +/// In many cases vectorization is not profitable. This can happen because +/// of a number of reasons. In this class we mainly attempt to predict +/// the expected speedup/slowdowns due to the supported instruction set. +/// We use the VectorTargetTransformInfo to query the different backends +/// for the cost of different operations. +class LoopVectorizationCostModel { +public: + /// C'tor. + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, + LoopVectorizationLegality *Leg, + const VectorTargetTransformInfo *Vtti): + TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } + + /// Returns the most profitable vectorization factor for the loop that is + /// smaller or equal to the VF argument. This method checks every power + /// of two up to VF. + unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize); + +private: + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. + unsigned expectedCost(unsigned VF); + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + unsigned getInstructionCost(Instruction *I, unsigned VF); + + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + + /// Vectorization legality. + LoopVectorizationLegality *Legal; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; +}; + +}// namespace llvm + +#endif //LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H + diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index d26973a7b3..3fb36cadea 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Transforms/Vectorize.h" +#include "llvm/Transforms/Vectorize.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" +#include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Vectorize.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" using namespace llvm; |