aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt2
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp6
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp29
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp12
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp111
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp48
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp17
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp58
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp25
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp14
-rw-r--r--lib/Transforms/Scalar/BoundsChecking.cpp448
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp27
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp83
-rw-r--r--lib/Transforms/Scalar/GVN.cpp182
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp34
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp2
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp8
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp19
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp78
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp281
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp21
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp12
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp9
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp10
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp2
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp14
-rw-r--r--lib/Transforms/Utils/Local.cpp21
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp2
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp4
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp52
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp20
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp807
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt2
46 files changed, 1305 insertions, 1218 deletions
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 58b3551cd7..3f6b1de614 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,3 +20,5 @@ add_llvm_library(LLVMipo
StripDeadPrototypes.cpp
StripSymbols.cpp
)
+
+add_dependencies(LLVMipo intrinsics_gen)
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 2b427aa6a4..18c1c7b000 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -65,7 +65,7 @@ bool GlobalDCE::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
// Functions with external linkage are needed if they have a body
- if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ if (!I->isDiscardableIfUnused() &&
!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
GlobalIsNeeded(I);
}
@@ -75,7 +75,7 @@ bool GlobalDCE::runOnModule(Module &M) {
Changed |= RemoveUnusedGlobalValue(*I);
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ if (!I->isDiscardableIfUnused() &&
!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
GlobalIsNeeded(I);
}
@@ -84,7 +84,7 @@ bool GlobalDCE::runOnModule(Module &M) {
I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
// Externally visible aliases are needed.
- if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+ if (!I->isDiscardableIfUnused())
GlobalIsNeeded(I);
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index d316d52678..4e1c23c198 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -254,6 +254,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::isStored;
}
}
+ } else if (isa<BitCastInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
} else if (isa<GetElementPtrInst>(I)) {
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
} else if (isa<SelectInst>(I)) {
@@ -517,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
- GV->isThreadLocal(),
+ GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
@@ -550,7 +552,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
- GV->isThreadLocal(),
+ GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
@@ -866,7 +868,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
- GV->isThreadLocal());
+ GV->getThreadLocalMode());
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
@@ -899,7 +901,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
- GV->getName()+".init", GV->isThreadLocal());
+ GV->getName()+".init", GV->getThreadLocalMode());
bool InitBoolUsed = false;
// Loop over all uses of GV, processing them in turn.
@@ -1321,7 +1323,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
PFieldTy, false, GlobalValue::InternalLinkage,
Constant::getNullValue(PFieldTy),
GV->getName() + ".f" + Twine(FieldNo), GV,
- GV->isThreadLocal());
+ GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
@@ -1567,8 +1569,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
- CI = dyn_cast<BitCastInst>(Malloc) ?
- extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
+ CI = cast<CallInst>(BCI->getOperand(0));
+ else
+ CI = cast<CallInst>(Malloc);
}
GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
@@ -1645,7 +1649,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
- GV->isThreadLocal());
+ GV->getThreadLocalMode());
GV->getParent()->getGlobalList().insert(GV, NewGV);
Constant *InitVal = GV->getInitializer();
@@ -1716,7 +1720,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
/// possible. If we make a change, return true.
bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
Module::global_iterator &GVI) {
- if (!GV->hasLocalLinkage())
+ if (!GV->isDiscardableIfUnused())
return false;
// Do more involved optimizations if the global is internal.
@@ -1729,6 +1733,9 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
return true;
}
+ if (!GV->hasLocalLinkage())
+ return false;
+
SmallPtrSet<const PHINode*, 16> PHIUsers;
GlobalStatus GS;
@@ -2049,7 +2056,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
// Create the new global and insert it next to the existing list.
GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
- GCL->isThreadLocal());
+ GCL->getThreadLocalMode());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
NGV->takeName(GCL);
@@ -2705,7 +2712,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
<< " stores.\n");
for (DenseMap<Constant*, Constant*>::const_iterator I =
Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
- I != E; ++I)
+ I != E; ++I)
CommitValueTo(I->second, I->first);
for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
Eval.getInvariants().begin(), E = Eval.getInvariants().end();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 0b01c3822f..715a384adc 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -45,22 +45,22 @@
#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Constants.h"
+#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index b5caa9a557..d8e8cf77dd 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -22,11 +22,11 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index d070ccc0d6..72cfe2c985 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,3 +13,5 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
+
+add_dependencies(LLVMInstCombine intrinsics_gen)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 199df519ce..c2b0e03b40 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,11 +11,11 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/TargetFolder.h"
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 00f9974125..99b62f8d05 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -544,11 +544,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
- // C - zext(bool) -> bool ? C - 1 : C
- if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
- if (ZI->getSrcTy()->isIntegerTy(1))
- return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
-
// C-(X+C2) --> (C-C2)-X
ConstantInt *C2;
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3bafc6661b..7d0af0d802 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -995,9 +995,11 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
std::swap(Op0Ordered, Op1Ordered);
}
if (Op0Pred == 0) {
- // uno && ueq -> uno && (uno || eq) -> ueq
+ // uno && ueq -> uno && (uno || eq) -> uno
// ord && olt -> ord && (ord && lt) -> olt
- if (Op0Ordered == Op1Ordered)
+ if (!Op0Ordered && (Op0Ordered == Op1Ordered))
+ return LHS;
+ if (Op0Ordered && (Op0Ordered == Op1Ordered))
return RHS;
// uno && oeq -> uno && (ord && eq) -> false
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 19776b1e09..f74cff85c6 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -172,8 +172,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (isFreeCall(&CI))
return visitFree(CI);
- if (extractMallocCall(&CI) || extractCallocCall(&CI))
- return visitMalloc(CI);
// If the caller function is nounwind, mark the call as nounwind, even if the
// callee isn't.
@@ -246,84 +244,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
- // We need target data for just about everything so depend on it.
- if (!TD) return 0;
-
- Type *ReturnTy = CI.getType();
- uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
-
- // Get to the real allocated thing and offset as fast as possible.
- Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
-
- uint64_t Offset = 0;
- uint64_t Size = -1ULL;
-
- // Try to look through constant GEPs.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
- if (!GEP->hasAllConstantIndices()) return 0;
-
- // Get the current byte offset into the thing. Use the original
- // operand in case we're looking through a bitcast.
- SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
- if (!GEP->getPointerOperandType()->isPointerTy())
- return 0;
- Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
-
- Op1 = GEP->getPointerOperand()->stripPointerCasts();
-
- // Make sure we're not a constant offset from an external
- // global.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
- if (!GV->hasDefinitiveInitializer()) return 0;
- }
-
- // If we've stripped down to a single global variable that we
- // can know the size of then just return that.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
- if (GV->hasDefinitiveInitializer()) {
- Constant *C = GV->getInitializer();
- Size = TD->getTypeAllocSize(C->getType());
- } else {
- // Can't determine size of the GV.
- Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
- return ReplaceInstUsesWith(CI, RetVal);
- }
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
- // Get alloca size.
- if (AI->getAllocatedType()->isSized()) {
- Size = TD->getTypeAllocSize(AI->getAllocatedType());
- if (AI->isArrayAllocation()) {
- const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
- if (!C) return 0;
- Size *= C->getZExtValue();
- }
- }
- } else if (CallInst *MI = extractMallocCall(Op1)) {
- // Get allocation size.
- Value *Arg = MI->getArgOperand(0);
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
- Size = CI->getZExtValue();
-
- } else if (CallInst *MI = extractCallocCall(Op1)) {
- // Get allocation size.
- Value *Arg1 = MI->getArgOperand(0);
- Value *Arg2 = MI->getArgOperand(1);
- if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Arg1))
- if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Arg2))
- Size = (CI1->getValue() * CI2->getValue()).getZExtValue();
- }
-
- // Do not return "I don't know" here. Later optimization passes could
- // make it possible to evaluate objectsize to a constant.
- if (Size == -1ULL)
- return 0;
-
- if (Size < Offset) {
- // Out of bound reference? Negative index normalized to large
- // index? Just return "I don't know".
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
- }
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
+ uint64_t Size;
+ if (getObjectSize(II->getArgOperand(0), Size, TD))
+ return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
+ return 0;
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
@@ -768,7 +692,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
TerminatorInst *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
- if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+ if (isa<AllocaInst>(BI)) {
CannotRemove = true;
break;
}
@@ -955,6 +879,9 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
+ if (isAllocLikeFn(CS.getInstruction()))
+ return visitMalloc(*CS.getInstruction());
+
bool Changed = false;
// If the callee is a pointer to a function, attempt to move any casts to the
@@ -990,24 +917,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
}
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- // This instruction is not reachable, just remove it. We insert a store to
- // undef so that we know that this code is not reachable, despite the fact
- // that we can't modify the CFG here.
- new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
- CS.getInstruction());
-
// If CS does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!CS.getInstruction()->getType()->isVoidTy())
ReplaceInstUsesWith(*CS.getInstruction(),
UndefValue::get(CS.getInstruction()->getType()));
- if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
- // Don't break the CFG, insert a dummy cond branch.
- BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
- ConstantInt::getTrue(Callee->getContext()), II);
+ if (isa<InvokeInst>(CS.getInstruction())) {
+ // Can't remove an invoke because we cannot change the CFG.
+ return 0;
}
+
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ CS.getInstruction());
+
return EraseInstFromFunction(*CS.getInstruction());
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d07be2c8b3..555b4428d2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -648,10 +648,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
if (!I) return false;
// If the input is a truncate from the destination type, we can trivially
- // eliminate it, even if it has multiple uses.
- // FIXME: This is currently disabled until codegen can handle this without
- // pessimizing code, PR5997.
- if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ // eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
// We can't extend or shrink something that has multiple uses: doing so would
@@ -992,11 +990,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- // If this is a truncate from the dest type, we can trivially eliminate it,
- // even if it has multiple uses.
- // FIXME: This is currently disabled until codegen can handle this without
- // pessimizing code, PR5997.
- if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ // If this is a truncate from the dest type, we can trivially eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
// We can't extend or shrink something that has multiple uses: doing so would
@@ -1341,10 +1336,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// non-type-safe code.
if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
GEP->hasAllConstantIndices()) {
- // We are guaranteed to get a constant from EmitGEPOffset.
- ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
- int64_t Offset = OffsetV->getSExtValue();
-
+ SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
+
// Get the base pointer input of the bitcast, and the type it points to.
Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
Type *GEPIdxTy =
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b2f2e248e4..b9df5eb81e 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -106,7 +106,6 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -135,16 +134,49 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
- if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
- // If alloca'ing a zero byte object, replace the alloca with a null pointer.
- // Note that we only do this for alloca's, because malloc should allocate
- // and return a unique pointer, even for a zero byte allocation.
- if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
- return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
-
+ if (TD && AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+
+ // Move all alloca's of zero byte objects to the entry block and merge them
+ // together. Note that we only do this for alloca's, because malloc should
+ // allocate and return a unique pointer, even for a zero byte allocation.
+ if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ // For a zero sized alloca there is no point in doing an array allocation.
+ // This is helpful if the array size is a complicated expression not used
+ // elsewhere.
+ if (AI.isArrayAllocation()) {
+ AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
+ return &AI;
+ }
+
+ // Get the first instruction in the entry block.
+ BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
+ Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
+ if (FirstInst != &AI) {
+ // If the entry block doesn't start with a zero-size alloca then move
+ // this one to the start of the entry block. There is no problem with
+ // dominance as the array size was forced to a constant earlier already.
+ AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
+ if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
+ TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ AI.moveBefore(FirstInst);
+ return &AI;
+ }
+
+ // Replace this zero-sized alloca with the one at the start of the entry
+ // block after ensuring that the address will be aligned enough for both
+ // types.
+ unsigned MaxAlign =
+ std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()),
+ TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ EntryAI->setAlignment(MaxAlign);
+ if (AI.getType() != EntryAI->getType())
+ return new BitCastInst(EntryAI, AI.getType());
+ return ReplaceInstUsesWith(AI, EntryAI);
+ }
+ }
}
// Try to aggressively remove allocas which are only used for GEPs, lifetime
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 5168e2a113..35a0bbb761 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -464,9 +464,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
{ const APInt *CI; Value *N;
- if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+ if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
+ match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
if (*CI != 1)
N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2()));
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
+ N = Builder->CreateZExt(N, Z->getDestTy());
if (I.isExact())
return BinaryOperator::CreateExactLShr(Op0, N);
return BinaryOperator::CreateLShr(Op0, N);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b8a533bf7c..c5124bf7b2 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1058,10 +1058,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
- // Determine how much the GEP moves the pointer. We are guaranteed to get
- // a constant back from EmitGEPOffset.
- ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
- int64_t Offset = OffsetV->getSExtValue();
+ // Determine how much the GEP moves the pointer.
+ SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end());
+ int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops);
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1069,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isMalloc(BCI->getOperand(0))) {
+ isAllocationFn(BCI->getOperand(0))) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1168,6 +1167,14 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) {
}
EraseInstFromFunction(*I);
}
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
+ // Replace invoke with a NOP intrinsic to maintain the original CFG
+ Module *M = II->getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
+ InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
+ ArrayRef<Value *>(), "", II->getParent());
+ }
return EraseInstFromFunction(MI);
}
return 0;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index a9d08db9c6..482ebef2a2 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -16,6 +16,12 @@
#define DEBUG_TYPE "asan"
#include "FunctionBlackList.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallSet.h"
@@ -23,14 +29,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetData.h"
@@ -38,7 +39,6 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Type.h"
#include <string>
#include <algorithm>
@@ -82,6 +82,14 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
cl::Hidden, cl::init(true));
+// This flags limits the number of instructions to be instrumented
+// in any given BB. Normally, this should be set to unlimited (INT_MAX),
+// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
+// set it to 10000.
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
+ cl::init(10000),
+ cl::desc("maximal number of instructions to instrument in any given BB"),
+ cl::Hidden);
// This flag may need to be replaced with -f[no]asan-stack.
static cl::opt<bool> ClStack("asan-stack",
cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
@@ -149,7 +157,6 @@ struct AddressSanitizer : public ModulePass {
bool poisonStackInFunction(Module &M, Function &F);
virtual bool runOnModule(Module &M);
bool insertGlobalRedzones(Module &M);
- BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp);
static char ID; // Pass identification, replacement for typeid
private:
@@ -212,29 +219,27 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
// Split the basic block and insert an if-then code.
// Before:
// Head
-// SplitBefore
+// Cmp
// Tail
// After:
// Head
// if (Cmp)
-// NewBasicBlock
-// SplitBefore
+// ThenBlock
// Tail
//
-// Returns the NewBasicBlock's terminator.
-BranchInst *AddressSanitizer::splitBlockAndInsertIfThen(
- Instruction *SplitBefore, Value *Cmp) {
+// Returns the ThenBlock's terminator.
+static BranchInst *splitBlockAndInsertIfThen(Value *Cmp) {
+ Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
- BasicBlock *NewBasicBlock =
- BasicBlock::Create(*C, "", Head->getParent());
- BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock,
- /*ifFalse*/Tail,
- Cmp);
+ LLVMContext &C = Head->getParent()->getParent()->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
- BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock);
+ BranchInst *CheckTerm = BranchInst::Create(Tail, ThenBlock);
return CheckTerm;
}
@@ -283,8 +288,8 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(InsertBefore);
Value *Cmp = IRB.CreateICmpNE(Length,
- Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp);
+ Constant::getNullValue(Length->getType()));
+ InsertBefore = splitBlockAndInsertIfThen(Cmp);
}
instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
@@ -381,8 +386,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
- Instruction *CheckTerm = splitBlockAndInsertIfThen(
- cast<Instruction>(Cmp)->getNextNode(), Cmp);
+ Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp);
IRBuilder<> IRB2(CheckTerm);
size_t Granularity = 1 << MappingScale;
@@ -400,7 +404,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);
- CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2);
+ CheckTerm = splitBlockAndInsertIfThen(Cmp2);
}
IRBuilder<> IRB1(CheckTerm);
@@ -511,7 +515,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
M, NewTy, G->isConstant(), G->getLinkage(),
- NewInitializer, "", G, G->isThreadLocal());
+ NewInitializer, "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setAlignment(RedzoneSize);
@@ -689,6 +693,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
for (Function::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI) {
TempsToInstrument.clear();
+ int NumInsnsPerBB = 0;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
if (LooksLikeCodeInBug11395(BI)) return false;
@@ -710,6 +715,9 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
continue;
}
ToInstrument.push_back(BI);
+ NumInsnsPerBB++;
+ if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
+ break;
}
}
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index e4c8cf105c..eaa3a4000f 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -9,3 +9,5 @@ add_llvm_library(LLVMInstrumentation
ProfilingUtils.cpp
ThreadSanitizer.cpp
)
+
+add_dependencies(LLVMInstrumentation intrinsics_gen)
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 6c42137b3d..264a6a6153 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -18,23 +18,23 @@
#include "ProfilingUtils.h"
#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/PathV2.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <string>
#include <utility>
using namespace llvm;
@@ -448,7 +448,7 @@ bool GCOVProfiler::emitProfileArcs() {
new GlobalVariable(*M, CounterTy, false,
GlobalValue::InternalLinkage,
Constant::getNullValue(CounterTy),
- "__llvm_gcov_ctr", 0, false, 0);
+ "__llvm_gcov_ctr");
CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
UniqueVector<BasicBlock *> ComplexEdgePreds;
@@ -687,8 +687,7 @@ void GCOVProfiler::insertCounterWriteout(
FTy = FunctionType::get(Type::getInt32Ty(*Ctx),
PointerType::get(FTy, 0), false);
- Function *AtExitFn =
- Function::Create(FTy, GlobalValue::ExternalLinkage, "atexit", M);
+ Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 31af145e79..4c12a9b624 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -22,26 +22,26 @@
#define DEBUG_TYPE "tsan"
#include "FunctionBlackList.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Type.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/BoundsChecking.cpp b/lib/Transforms/Scalar/BoundsChecking.cpp
index d10d97ca05..0690d76e7b 100644
--- a/lib/Transforms/Scalar/BoundsChecking.cpp
+++ b/lib/Transforms/Scalar/BoundsChecking.cpp
@@ -14,55 +14,29 @@
#define DEBUG_TYPE "bounds-checking"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetFolder.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Metadata.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
using namespace llvm;
-static cl::opt<bool> ManyTrapBB("bounds-checking-multiple-traps",
- cl::desc("Use one trap block per assertion"));
+static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
+ cl::desc("Use one trap block per function"));
STATISTIC(ChecksAdded, "Bounds checks added");
STATISTIC(ChecksSkipped, "Bounds checks skipped");
STATISTIC(ChecksUnable, "Bounds checks unable to add");
-STATISTIC(ChecksUnableInterproc, "Bounds checks unable to add (interprocedural)");
-STATISTIC(ChecksUnableLoad, "Bounds checks unable to add (LoadInst)");
typedef IRBuilder<true, TargetFolder> BuilderTy;
namespace {
- // FIXME: can use unions here to save space
- struct CacheData {
- APInt Offset;
- Value *OffsetValue;
- APInt Size;
- Value *SizeValue;
- bool ReturnVal;
- CacheData() {}
- CacheData(APInt Off, Value *OffVal, APInt Sz, Value *SzVal, bool Ret) :
- Offset(Off), OffsetValue(OffVal), Size(Sz), SizeValue(SzVal),
- ReturnVal(Ret) {}
- };
- typedef DenseMap<Value*, CacheData> CacheMapTy;
- typedef SmallPtrSet<Value*, 8> PtrSetTy;
-
struct BoundsChecking : public FunctionPass {
static char ID;
@@ -74,20 +48,15 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
- AU.addRequired<LoopInfo>();
- AU.addRequired<ScalarEvolution>();
}
private:
const TargetData *TD;
- LoopInfo *LI;
- ScalarEvolution *SE;
+ ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
- Function *Fn;
+ Instruction *Inst;
BasicBlock *TrapBB;
unsigned Penalty;
- CacheMapTy CacheMap;
- PtrSetTy SeenPtrs;
BasicBlock *getTrapBB();
void emitBranchToTrap(Value *Cmp = 0);
@@ -108,9 +77,10 @@ INITIALIZE_PASS_END(BoundsChecking, "bounds-checking",
/// getTrapBB - create a basic block that traps. All overflowing conditions
/// branch to this block. There's only one trap block per function.
BasicBlock *BoundsChecking::getTrapBB() {
- if (TrapBB && !ManyTrapBB)
+ if (TrapBB && SingleTrapBB)
return TrapBB;
+ Function *Fn = Inst->getParent()->getParent();
BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
Builder->SetInsertPoint(TrapBB);
@@ -119,6 +89,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
CallInst *TrapCall = Builder->CreateCall(F);
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
+ TrapCall->setDebugLoc(Inst->getDebugLoc());
Builder->CreateUnreachable();
Builder->SetInsertPoint(PrevInsertPoint);
@@ -129,6 +100,16 @@ BasicBlock *BoundsChecking::getTrapBB() {
/// emitBranchToTrap - emit a branch instruction to a trap block.
/// If Cmp is non-null, perform a jump only if its value evaluates to true.
void BoundsChecking::emitBranchToTrap(Value *Cmp) {
+ // check if the comparison is always false
+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(Cmp);
+ if (C) {
+ ++ChecksSkipped;
+ if (!C->getZExtValue())
+ return;
+ else
+ Cmp = 0; // unconditional branch
+ }
+
Instruction *Inst = Builder->GetInsertPoint();
BasicBlock *OldBB = Inst->getParent();
BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
@@ -141,310 +122,6 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
}
-#define GET_VALUE(Val, Int) \
- if (!Val) \
- Val = ConstantInt::get(IntTy, Int)
-
-#define RETURN(Val) \
- do { ReturnVal = Val; goto cache_and_return; } while (0)
-
-/// computeAllocSize - compute the object size and the offset within the object
-/// pointed by Ptr. OffsetValue/SizeValue will be null if they are constant, and
-/// therefore the result is given in Offset/Size variables instead.
-/// Returns true if the offset and size could be computed within the given
-/// maximum run-time penalty.
-bool BoundsChecking::computeAllocSize(Value *Ptr, APInt &Offset,
- Value* &OffsetValue, APInt &Size,
- Value* &SizeValue) {
- Ptr = Ptr->stripPointerCasts();
-
- // lookup to see if we've seen the Ptr before
- CacheMapTy::iterator CacheIt = CacheMap.find(Ptr);
- if (CacheIt != CacheMap.end()) {
- CacheData &Cache = CacheIt->second;
- Offset = Cache.Offset;
- OffsetValue = Cache.OffsetValue;
- Size = Cache.Size;
- SizeValue = Cache.SizeValue;
- return Cache.ReturnVal;
- }
-
- // record the pointers that were handled in this run, so that they can be
- // cleaned later if something fails
- SeenPtrs.insert(Ptr);
-
- IntegerType *IntTy = TD->getIntPtrType(Fn->getContext());
- unsigned IntTyBits = IntTy->getBitWidth();
- bool ReturnVal;
-
- // always generate code immediately before the instruction being processed, so
- // that the generated code dominates the same BBs
- Instruction *PrevInsertPoint = Builder->GetInsertPoint();
- if (Instruction *I = dyn_cast<Instruction>(Ptr))
- Builder->SetInsertPoint(I);
-
- // initalize with "don't know" state: offset=0 and size=uintmax
- Offset = 0;
- Size = APInt::getMaxValue(TD->getTypeSizeInBits(IntTy));
- OffsetValue = SizeValue = 0;
-
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt PtrOffset(IntTyBits, 0);
- Value *PtrOffsetValue = 0;
- if (!computeAllocSize(GEP->getPointerOperand(), PtrOffset, PtrOffsetValue,
- Size, SizeValue))
- RETURN(false);
-
- if (GEP->hasAllConstantIndices()) {
- SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
- Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
- // if PtrOffset is constant, return immediately
- if (!PtrOffsetValue) {
- Offset += PtrOffset;
- RETURN(true);
- }
- OffsetValue = ConstantInt::get(IntTy, Offset);
- } else if (Penalty > 1) {
- OffsetValue = EmitGEPOffset(Builder, *TD, GEP);
- GET_VALUE(PtrOffsetValue, PtrOffset);
- } else
- RETURN(false);
-
- OffsetValue = Builder->CreateAdd(PtrOffsetValue, OffsetValue);
- RETURN(true);
-
- // global variable with definitive size
- } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
- if (GV->hasDefinitiveInitializer()) {
- Constant *C = GV->getInitializer();
- Size = TD->getTypeAllocSize(C->getType());
- RETURN(true);
- }
- RETURN(false);
-
- // stack allocation
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
- if (!AI->getAllocatedType()->isSized())
- RETURN(false);
-
- Size = TD->getTypeAllocSize(AI->getAllocatedType());
- if (!AI->isArrayAllocation())
- RETURN(true); // we are done
-
- Value *ArraySize = AI->getArraySize();
- if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
- Size *= C->getValue();
- RETURN(true);
- }
-
- if (Penalty < 2)
- RETURN(false);
-
- // VLA: compute size dynamically
- SizeValue = ConstantInt::get(ArraySize->getType(), Size);
- SizeValue = Builder->CreateMul(SizeValue, ArraySize);
- RETURN(true);
-
- // function arguments
- } else if (Argument *A = dyn_cast<Argument>(Ptr)) {
- // right now we only support byval arguments, so that no interprocedural
- // analysis is necessary
- if (!A->hasByValAttr()) {
- ++ChecksUnableInterproc;
- RETURN(false);
- }
-
- PointerType *PT = cast<PointerType>(A->getType());
- Size = TD->getTypeAllocSize(PT->getElementType());
- RETURN(true);
-
- // ptr = select(ptr1, ptr2)
- } else if (SelectInst *SI = dyn_cast<SelectInst>(Ptr)) {
- APInt OffsetTrue(IntTyBits, 0), OffsetFalse(IntTyBits, 0);
- APInt SizeTrue(IntTyBits, 0), SizeFalse(IntTyBits, 0);
- Value *OffsetValueTrue = 0, *OffsetValueFalse = 0;
- Value *SizeValueTrue = 0, *SizeValueFalse = 0;
-
- bool TrueAlloc = computeAllocSize(SI->getTrueValue(), OffsetTrue,
- OffsetValueTrue, SizeTrue, SizeValueTrue);
- bool FalseAlloc = computeAllocSize(SI->getFalseValue(), OffsetFalse,
- OffsetValueFalse, SizeFalse,
- SizeValueFalse);
- if (!TrueAlloc || !FalseAlloc)
- RETURN(false);
-
- // fold constant sizes & offsets if they are equal
- if (!OffsetValueTrue && !OffsetValueFalse && OffsetTrue == OffsetFalse)
- Offset = OffsetTrue;
- else if (Penalty > 1) {
- GET_VALUE(OffsetValueTrue, OffsetTrue);
- GET_VALUE(OffsetValueFalse, OffsetFalse);
- OffsetValue = Builder->CreateSelect(SI->getCondition(), OffsetValueTrue,
- OffsetValueFalse);
- } else
- RETURN(false);
-
- if (!SizeValueTrue && !SizeValueFalse && SizeTrue == SizeFalse)
- Size = SizeTrue;
- else if (Penalty > 1) {
- GET_VALUE(SizeValueTrue, SizeTrue);
- GET_VALUE(SizeValueFalse, SizeFalse);
- SizeValue = Builder->CreateSelect(SI->getCondition(), SizeValueTrue,
- SizeValueFalse);
- } else
- RETURN(false);
- RETURN(true);
-
- // call allocation function
- } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
- SmallVector<unsigned, 4> Args;
-
- if (MDNode *MD = CI->getMetadata("alloc_size")) {
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- Args.push_back(cast<ConstantInt>(MD->getOperand(i))->getZExtValue());
-
- } else if (Function *Callee = CI->getCalledFunction()) {
- FunctionType *FTy = Callee->getFunctionType();
-
- // alloc(size)
- if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy()) {
- if ((Callee->getName() == "malloc" ||
- Callee->getName() == "valloc" ||
- Callee->getName() == "_Znwj" || // operator new(unsigned int)
- Callee->getName() == "_Znwm" || // operator new(unsigned long)
- Callee->getName() == "_Znaj" || // operator new[](unsigned int)
- Callee->getName() == "_Znam")) {
- Args.push_back(0);
- }
- } else if (FTy->getNumParams() == 2) {
- // alloc(_, x)
- if (FTy->getParamType(1)->isIntegerTy() &&
- ((Callee->getName() == "realloc" ||
- Callee->getName() == "reallocf"))) {
- Args.push_back(1);
-
- // alloc(x, y)
- } else if (FTy->getParamType(0)->isIntegerTy() &&
- FTy->getParamType(1)->isIntegerTy() &&
- Callee->getName() == "calloc") {
- Args.push_back(0);
- Args.push_back(1);
- }
- } else if (FTy->getNumParams() == 3) {
- // alloc(_, _, x)
- if (FTy->getParamType(2)->isIntegerTy() &&
- Callee->getName() == "posix_memalign") {
- Args.push_back(2);
- }
- }
- }
-
- if (Args.empty())
- RETURN(false);
-
- // check if all arguments are constant. if so, the object size is also const
- bool AllConst = true;
- for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
- I != E; ++I) {
- if (!isa<ConstantInt>(CI->getArgOperand(*I))) {
- AllConst = false;
- break;
- }
- }
-
- if (AllConst) {
- Size = 1;
- for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
- I != E; ++I) {
- ConstantInt *Arg = cast<ConstantInt>(CI->getArgOperand(*I));
- Size *= Arg->getValue().zextOrSelf(IntTyBits);
- }
- RETURN(true);
- }
-
- if (Penalty < 2)
- RETURN(false);
-
- // not all arguments are constant, so create a sequence of multiplications
- for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
- I != E; ++I) {
- Value *Arg = Builder->CreateZExt(CI->getArgOperand(*I), IntTy);
- if (!SizeValue) {
- SizeValue = Arg;
- continue;
- }
- SizeValue = Builder->CreateMul(SizeValue, Arg);
- }
- RETURN(true);
-
- // TODO: handle more standard functions (+ wchar cousins):
- // - strdup / strndup
- // - strcpy / strncpy
- // - strcat / strncat
- // - memcpy / memmove
- // - strcat / strncat
- // - memset
-
- } else if (PHINode *PHI = dyn_cast<PHINode>(Ptr)) {
- // create 2 PHIs: one for offset and another for size
- PHINode *OffsetPHI = Builder->CreatePHI(IntTy, PHI->getNumIncomingValues());
- PHINode *SizePHI = Builder->CreatePHI(IntTy, PHI->getNumIncomingValues());
-
- // insert right away in the cache to handle recursive PHIs
- CacheMap[Ptr] = CacheData(APInt(), OffsetPHI, APInt(), SizePHI, true);
-
- // compute offset/size for each PHI incoming pointer
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
- Builder->SetInsertPoint(PHI->getIncomingBlock(i)->getFirstInsertionPt());
-
- APInt PhiOffset(IntTyBits, 0), PhiSize(IntTyBits, 0);
- Value *PhiOffsetValue = 0, *PhiSizeValue = 0;
-
- if (!computeAllocSize(PHI->getIncomingValue(i), PhiOffset, PhiOffsetValue,
- PhiSize, PhiSizeValue)) {
- OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
- OffsetPHI->eraseFromParent();
- SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
- SizePHI->eraseFromParent();
- RETURN(false);
- }
-
- GET_VALUE(PhiOffsetValue, PhiOffset);
- GET_VALUE(PhiSizeValue, PhiSize);
-
- OffsetPHI->addIncoming(PhiOffsetValue, PHI->getIncomingBlock(i));
- SizePHI->addIncoming(PhiSizeValue, PHI->getIncomingBlock(i));
- }
-
- OffsetValue = OffsetPHI;
- SizeValue = SizePHI;
- RETURN(true);
-
- } else if (isa<UndefValue>(Ptr) || isa<ConstantPointerNull>(Ptr)) {
- Size = 0;
- RETURN(true);
-
- } else if (isa<LoadInst>(Ptr)) {
- ++ChecksUnableLoad;
- RETURN(false);
- }
-
- DEBUG(dbgs() << "computeAllocSize unhandled value:\n" << *Ptr << "\n");
- RETURN(false);
-
-cache_and_return:
- // cache the result and return
- CacheMap[Ptr] = CacheData(Offset, OffsetValue, Size, SizeValue, ReturnVal);
-
- // non-computable results can be safely cached
- if (!ReturnVal)
- SeenPtrs.erase(Ptr);
-
- Builder->SetInsertPoint(PrevInsertPoint);
- return ReturnVal;
-}
-
-
/// instrument - adds run-time bounds checks to memory accessing instructions.
/// Ptr is the pointer that will be read/written, and InstVal is either the
/// result from the load or the value being stored. It is used to determine the
@@ -455,67 +132,29 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
- IntegerType *IntTy = TD->getIntPtrType(Fn->getContext());
- unsigned IntTyBits = IntTy->getBitWidth();
-
- APInt Offset(IntTyBits, 0), Size(IntTyBits, 0);
- Value *OffsetValue = 0, *SizeValue = 0;
-
- if (!computeAllocSize(Ptr, Offset, OffsetValue, Size, SizeValue)) {
- DEBUG(dbgs() << "computeAllocSize failed:\n" << *Ptr << "\n");
-
- // erase everything that was computed in this iteration from the cache, so
- // that no dangling references are left behind. We could be a bit smarter if
- // we kept a dependency graph. It's probably not worth the complexity,
- // though.
- for (PtrSetTy::iterator I=SeenPtrs.begin(), E=SeenPtrs.end(); I != E; ++I)
- CacheMap.erase(*I);
- SeenPtrs.clear();
+ SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr);
+ if (!ObjSizeEval->bothKnown(SizeOffset)) {
++ChecksUnable;
return false;
}
+ Value *Size = SizeOffset.first;
+ Value *Offset = SizeOffset.second;
+
+ IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
+ Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+
// three checks are required to ensure safety:
// . Offset >= 0 (since the offset is given from the base ptr)
// . Size >= Offset (unsigned)
// . Size - Offset >= NeededSize (unsigned)
- if (!OffsetValue && !SizeValue) {
- if (Offset.slt(0) || Size.ult(Offset) || (Size - Offset).ult(NeededSize)) {
- // Out of bounds
- emitBranchToTrap();
- ++ChecksAdded;
- return true;
- }
- // in bounds
- ++ChecksSkipped;
- return false;
- }
-
- // emit check for offset < 0
- Value *CmpOffset = 0;
- if (OffsetValue)
- CmpOffset = Builder->CreateICmpSLT(OffsetValue, ConstantInt::get(IntTy, 0));
- else if (Offset.slt(0)) {
- // offset proved to be negative
- emitBranchToTrap();
- ++ChecksAdded;
- return true;
- }
-
- // we couldn't determine statically if the memory access is safe; emit a
- // run-time check
- GET_VALUE(OffsetValue, Offset);
- GET_VALUE(SizeValue, Size);
-
- Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
- Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue);
- Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue);
- Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
- Value *Or = Builder->CreateOr(Cmp1, Cmp2);
- if (CmpOffset)
- Or = Builder->CreateOr(CmpOffset, Or);
+ Value *ObjSize = Builder->CreateSub(Size, Offset);
+ Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+ Value *Cmp2 = Builder->CreateICmpULT(Size, Offset);
+ Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
+ Value *Or = Builder->CreateOr(Cmp1, Builder->CreateOr(Cmp2, Cmp3));
emitBranchToTrap(Or);
++ChecksAdded;
@@ -524,13 +163,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
bool BoundsChecking::runOnFunction(Function &F) {
TD = &getAnalysis<TargetData>();
- LI = &getAnalysis<LoopInfo>();
- SE = &getAnalysis<ScalarEvolution>();
TrapBB = 0;
- Fn = &F;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext());
+ ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
// touching instructions
@@ -545,16 +183,16 @@ bool BoundsChecking::runOnFunction(Function &F) {
bool MadeChange = false;
for (std::vector<Instruction*>::iterator i = WorkList.begin(),
e = WorkList.end(); i != e; ++i) {
- Instruction *I = *i;
+ Inst = *i;
- Builder->SetInsertPoint(I);
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Builder->SetInsertPoint(Inst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
MadeChange |= instrument(LI->getPointerOperand(), LI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
- } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
- } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(I)) {
+ } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
} else {
llvm_unreachable("unknown Instruction type");
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 635c34486e..bf9cc66392 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -33,3 +33,5 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
+
+add_dependencies(LLVMScalarOpts intrinsics_gen)
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 24d64b50c2..cbc089ab78 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,32 +18,32 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -1133,7 +1133,8 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
// If we have a SelectInst that will likely profit from branch prediction,
// turn it into a branch.
- if (DisableSelectToBranch || OptSize || !TLI->isPredictableSelectExpensive())
+ if (DisableSelectToBranch || OptSize || !TLI ||
+ !TLI->isPredictableSelectExpensive())
return false;
if (!SI->getCondition()->getType()->isIntegerTy(1) ||
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f498cc7934..c8448fa6c1 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -32,7 +32,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -71,7 +71,7 @@ namespace {
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallPtrSet<Value*, 16> &DeadStackObjects);
+ SmallSetVector<Value*, 16> &DeadStackObjects);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -106,7 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
- SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallSetVector<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -136,7 +136,7 @@ static void DeleteDeadInstruction(Instruction *I,
DeadInst->eraseFromParent();
- if (ValueSet) ValueSet->erase(DeadInst);
+ if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
@@ -275,39 +275,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
- const TargetData *TD = AA.getTargetData();
-
- if (const CallInst *CI = extractMallocCall(V)) {
- if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
- return C->getZExtValue();
- }
-
- if (const CallInst *CI = extractCallocCall(V)) {
- if (const ConstantInt *C1 = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
- if (const ConstantInt *C2 = dyn_cast<ConstantInt>(CI->getArgOperand(1)))
- return (C1->getValue() * C2->getValue()).getZExtValue();
- }
-
- if (TD == 0)
- return AliasAnalysis::UnknownSize;
-
- if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) {
- // Get size information for the alloca
- if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
- return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- }
-
- if (const Argument *A = dyn_cast<Argument>(V)) {
- if (A->hasByValAttr())
- if (PointerType *PT = dyn_cast<PointerType>(A->getType()))
- return TD->getTypeAllocSize(PT->getElementType());
- }
-
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->mayBeOverridden())
- return TD->getTypeAllocSize(GV->getType()->getElementType());
- }
-
+ uint64_t Size;
+ if (getObjectSize(V, Size, AA.getTargetData()))
+ return Size;
return AliasAnalysis::UnknownSize;
}
@@ -700,21 +670,18 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Keep track of all of the stack objects that are dead at the end of the
// function.
- SmallPtrSet<Value*, 16> DeadStackObjects;
+ SmallSetVector<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- DeadStackObjects.insert(AI);
+ if (isa<AllocaInst>(I))
+ DeadStackObjects.insert(I);
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- CallInst *CI = extractMallocCall(I);
- if (!CI)
- CI = extractCallocCall(I);
- if (CI && !PointerMayBeCaptured(CI, true, true))
- DeadStackObjects.insert(CI);
+ else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+ DeadStackObjects.insert(I);
}
// Treat byval arguments the same, stores to them are dead at the end of the
@@ -773,18 +740,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
- DeadStackObjects.erase(A);
- continue;
- }
-
- if (CallInst *CI = extractMallocCall(BBI)) {
- DeadStackObjects.erase(CI);
- continue;
- }
-
- if (CallInst *CI = extractCallocCall(BBI)) {
- DeadStackObjects.erase(CI);
+ if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) {
+ DeadStackObjects.remove(BBI);
continue;
}
@@ -797,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
- for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the call site touches it.
AliasAnalysis::ModRefResult A =
@@ -809,7 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.erase(*I);
+ DeadStackObjects.remove(*I);
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
@@ -856,7 +813,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ SmallSetVector<Value*, 16> &DeadStackObjects) {
const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
// A constant can't be in the dead pointer set.
@@ -866,12 +823,12 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
- DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
return;
}
SmallVector<Value*, 16> NowLive;
- for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the loaded location could alias the stack location.
AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
@@ -881,5 +838,5 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
I != E; ++I)
- DeadStackObjects.erase(*I);
+ DeadStackObjects.remove(*I);
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c247ea9360..476ec383e6 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -18,9 +18,15 @@
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
@@ -31,21 +37,14 @@
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
using namespace PatternMatch;
@@ -1437,7 +1436,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
@@ -1736,155 +1735,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B) {
- if (!A || !B)
- return NULL;
-
- if (A == B)
- return A;
-
- SmallVector<MDNode *, 4> PathA;
- MDNode *T = A;
- while (T) {
- PathA.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
- }
-
- SmallVector<MDNode *, 4> PathB;
- T = B;
- while (T) {
- PathB.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
- }
-
- int IA = PathA.size() - 1;
- int IB = PathB.size() - 1;
-
- MDNode *Ret = 0;
- while (IA >= 0 && IB >=0) {
- if (PathA[IA] == PathB[IB])
- Ret = PathA[IA];
- else
- break;
- --IA;
- --IB;
- }
- return Ret;
-}
-
-static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B) {
- if (!A || !B)
- return NULL;
-
- APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF();
- APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF();
- if (AVal.compare(BVal) == APFloat::cmpLessThan)
- return A;
- return B;
-}
-
-static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
- return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
-}
-
-static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) {
- return !A.intersectWith(B).isEmptySet() || isContiguous(A, B);
-}
-
-static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
- ConstantInt *High) {
- ConstantRange NewRange(Low->getValue(), High->getValue());
- unsigned Size = EndPoints.size();
- APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue();
- APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue();
- ConstantRange LastRange(LB, LE);
- if (canBeMerged(NewRange, LastRange)) {
- ConstantRange Union = LastRange.unionWith(NewRange);
- Type *Ty = High->getType();
- EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower());
- EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper());
- return true;
- }
- return false;
-}
-
-static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
- ConstantInt *High) {
- if (!EndPoints.empty())
- if (tryMergeRange(EndPoints, Low, High))
- return;
-
- EndPoints.push_back(Low);
- EndPoints.push_back(High);
-}
-
-static MDNode *getMostGenericRange(MDNode *A, MDNode *B) {
- // Given two ranges, we want to compute the union of the ranges. This
- // is slightly complitade by having to combine the intervals and merge
- // the ones that overlap.
-
- if (!A || !B)
- return NULL;
-
- if (A == B)
- return A;
-
- // First, walk both lists in older of the lower boundary of each interval.
- // At each step, try to merge the new interval to the last one we adedd.
- SmallVector<Value*, 4> EndPoints;
- int AI = 0;
- int BI = 0;
- int AN = A->getNumOperands() / 2;
- int BN = B->getNumOperands() / 2;
- while (AI < AN && BI < BN) {
- ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI));
- ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI));
-
- if (ALow->getValue().slt(BLow->getValue())) {
- addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1)));
- ++AI;
- } else {
- addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1)));
- ++BI;
- }
- }
- while (AI < AN) {
- addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)),
- cast<ConstantInt>(A->getOperand(2 * AI + 1)));
- ++AI;
- }
- while (BI < BN) {
- addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)),
- cast<ConstantInt>(B->getOperand(2 * BI + 1)));
- ++BI;
- }
-
- // If we have more than 2 ranges (4 endpoints) we have to try to merge
- // the last and first ones.
- unsigned Size = EndPoints.size();
- if (Size > 4) {
- ConstantInt *FB = cast<ConstantInt>(EndPoints[0]);
- ConstantInt *FE = cast<ConstantInt>(EndPoints[1]);
- if (tryMergeRange(EndPoints, FB, FE)) {
- for (unsigned i = 0; i < Size - 2; ++i) {
- EndPoints[i] = EndPoints[i + 2];
- }
- EndPoints.resize(Size - 2);
- }
- }
-
- // If in the end we have a single range, it is possible that it is now the
- // full range. Just drop the metadata in that case.
- if (EndPoints.size() == 2) {
- ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(),
- cast<ConstantInt>(EndPoints[1])->getValue());
- if (Range.isFullSet())
- return NULL;
- }
-
- return MDNode::get(A->getContext(), EndPoints);
-}
-
static void patchReplacementInstruction(Value *Repl, Instruction *I) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
@@ -1911,16 +1761,16 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
case LLVMContext::MD_tbaa:
- ReplInst->setMetadata(Kind, getMostGenericTBAA(IMD, ReplMD));
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD));
break;
case LLVMContext::MD_range:
- ReplInst->setMetadata(Kind, getMostGenericRange(IMD, ReplMD));
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
break;
case LLVMContext::MD_prof:
llvm_unreachable("MD_prof in a non terminator instruction");
break;
case LLVMContext::MD_fpmath:
- ReplInst->setMetadata(Kind, getMostGenericFPMath(IMD, ReplMD));
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
break;
}
}
@@ -2101,7 +1951,7 @@ bool GVN::processLoad(LoadInst *L) {
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
markInstructionForDeletion(L);
++NumGVNLoad;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ad15cbb9b4..5fe9462159 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -43,20 +43,20 @@
#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 94c229a8e2..4ba969e675 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1308,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
@@ -4268,13 +4268,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
@@ -4296,23 +4289,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
-
- // Flush the operand list to suppress SCEVExpander hoisting.
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 689bbe9b03..221911866c 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -15,9 +15,9 @@
#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4341577f4d..052cc3dac0 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -15,21 +15,21 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index d89996a1ff..87e17c7f90 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -4064,8 +4064,22 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (!RetainRVMarker)
break;
BasicBlock::iterator BBI = Inst;
- --BBI;
- while (isNoopInstruction(BBI)) --BBI;
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RetainRV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (&*BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator();
+ break;
+ }
+ --BBI;
+ } while (isNoopInstruction(BBI));
+
if (&*BBI == GetObjCArg(Inst)) {
Changed = true;
InlineAsm *IA =
@@ -4075,6 +4089,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
/*Constraints=*/"", /*hasSideEffects=*/true);
CallInst::Create(IA, "", Inst);
}
+ decline_rv_optimization:
break;
}
case IC_InitWeak: {
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 66fa0744b8..bcf34b5256 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,20 +26,20 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
#include <algorithm>
using namespace llvm;
@@ -132,7 +132,7 @@ namespace {
private:
void BuildRankMap(Function &F);
unsigned getRank(Value *V);
- Value *ReassociateExpression(BinaryOperator *I);
+ void ReassociateExpression(BinaryOperator *I);
void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops);
@@ -667,23 +667,13 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// the new expression into.
SmallVector<BinaryOperator*, 8> NodesToRewrite;
unsigned Opcode = I->getOpcode();
- NodesToRewrite.push_back(I);
+ BinaryOperator *Op = I;
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
BinaryOperator *ExpressionChanged = 0;
- BinaryOperator *Previous;
- BinaryOperator *Op = 0;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- assert(!NodesToRewrite.empty() &&
- "Optimized expressions has more nodes than original!");
- Previous = Op; Op = NodesToRewrite.pop_back_val();
- if (ExpressionChanged)
- // Compactify the tree instructions together with each other to guarantee
- // that the expression tree is dominated by all of Ops.
- Op->moveBefore(Previous);
-
+ for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
// a subexpression.
@@ -754,32 +744,47 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// from the original expression then just rewrite the rest of the expression
// into it.
if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) {
- NodesToRewrite.push_back(BO);
+ Op = BO;
continue;
}
// Otherwise, grab a spare node from the original expression and use that as
- // the left-hand side.
- assert(!NodesToRewrite.empty() &&
- "Optimized expressions has more nodes than original!");
+ // the left-hand side. If there are no nodes left then the optimizers made
+ // an expression with more nodes than the original! This usually means that
+ // they did something stupid but it might mean that the problem was just too
+ // hard (finding the mimimal number of multiplications needed to realize a
+ // multiplication expression is NP-complete). Whatever the reason, smart or
+ // stupid, create a new node if there are none left.
+ BinaryOperator *NewOp;
+ if (NodesToRewrite.empty()) {
+ Constant *Undef = UndefValue::get(I->getType());
+ NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
+ Undef, Undef, "", I);
+ } else {
+ NewOp = NodesToRewrite.pop_back_val();
+ }
+
DEBUG(dbgs() << "RA: " << *Op << '\n');
- Op->setOperand(0, NodesToRewrite.back());
+ Op->setOperand(0, NewOp);
DEBUG(dbgs() << "TO: " << *Op << '\n');
ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
+ Op = NewOp;
}
// If the expression changed non-trivially then clear out all subclass data
- // starting from the operator specified in ExpressionChanged.
- if (ExpressionChanged) {
+ // starting from the operator specified in ExpressionChanged, and compactify
+ // the operators to just before the expression root to guarantee that the
+ // expression tree is dominated by all of Ops.
+ if (ExpressionChanged)
do {
ExpressionChanged->clearSubclassOptionalData();
if (ExpressionChanged == I)
break;
+ ExpressionChanged->moveBefore(I);
ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin());
} while (1);
- }
// Throw away any left over nodes from the original expression.
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
@@ -1478,14 +1483,17 @@ void Reassociate::EraseInst(Instruction *I) {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
// Erase the dead instruction.
ValueRankMap.erase(I);
+ RedoInsts.remove(I);
I->eraseFromParent();
// Optimize its operands.
+ SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
// If this is a node in an expression tree, climb to the expression root
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
- while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode)
+ while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
+ Visited.insert(Op))
Op = Op->use_back();
RedoInsts.insert(Op);
}
@@ -1585,7 +1593,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
ReassociateExpression(BO);
}
-Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
+void Reassociate::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
@@ -1612,6 +1620,9 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// OptimizeExpression - Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
if (Value *V = OptimizeExpression(I, Ops)) {
+ if (V == I)
+ // Self-referential expression in unreachable code.
+ return;
// This expression tree simplified to something that isn't a tree,
// eliminate it.
DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
@@ -1620,7 +1631,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
VI->setDebugLoc(I->getDebugLoc());
RedoInsts.insert(I);
++NumAnnihil;
- return V;
+ return;
}
// We want to sink immediates as deeply as possible except in the case where
@@ -1638,19 +1649,22 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
if (Ops.size() == 1) {
+ if (Ops[0].Op == I)
+ // Self-referential expression in unreachable code.
+ return;
+
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
OI->setDebugLoc(I->getDebugLoc());
RedoInsts.insert(I);
- return Ops[0].Op;
+ return;
}
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
RewriteExprTree(I, Ops);
- return I;
}
bool Reassociate::runOnFunction(Function &F) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 113397fc11..e3e3c9eb17 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -22,34 +22,34 @@
#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
@@ -60,12 +60,25 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
- SROA(int T, bool hasDT, char &ID)
+ SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
SRThreshold = T;
+ if (ST == -1)
+ StructMemberThreshold = 32;
+ else
+ StructMemberThreshold = ST;
+ if (AT == -1)
+ ArrayElementThreshold = 8;
+ else
+ ArrayElementThreshold = AT;
+ if (SLT == -1)
+ // Do not limit the scalar integer load size if no threshold is given.
+ ScalarLoadThreshold = -1;
+ else
+ ScalarLoadThreshold = SLT;
}
bool runOnFunction(Function &F);
@@ -87,7 +100,7 @@ namespace {
struct AllocaInfo {
/// The alloca to promote.
AllocaInst *AI;
-
+
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
/// looping and avoid redundant work.
SmallPtrSet<PHINode*, 8> CheckedPHIs;
@@ -116,8 +129,21 @@ namespace {
hasSubelementAccess(false), hasALoadOrStore(false) {}
};
+ /// SRThreshold - The maximum alloca size to considered for SROA.
unsigned SRThreshold;
+ /// StructMemberThreshold - The maximum number of members a struct can
+ /// contain to be considered for SROA.
+ unsigned StructMemberThreshold;
+
+ /// ArrayElementThreshold - The maximum number of elements an array can
+ /// have to be considered for SROA.
+ unsigned ArrayElementThreshold;
+
+ /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
+ /// converting to scalar
+ unsigned ScalarLoadThreshold;
+
void MarkUnsafe(AllocaInfo &I, Instruction *User) {
I.isUnsafe = true;
DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
@@ -156,6 +182,7 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
+ bool ShouldAttemptScalarRepl(AllocaInst *AI);
static MemTransferInst *isOnlyCopiedFromConstantGlobal(
AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
@@ -165,7 +192,8 @@ namespace {
struct SROA_DT : public SROA {
static char ID;
public:
- SROA_DT(int T = -1) : SROA(T, true, ID) {
+ SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, true, ID, ST, AT, SLT) {
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
}
@@ -181,7 +209,8 @@ namespace {
struct SROA_SSAUp : public SROA {
static char ID;
public:
- SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, false, ID, ST, AT, SLT) {
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
}
@@ -210,10 +239,15 @@ INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
- bool UseDomTree) {
+ bool UseDomTree,
+ int StructMemberThreshold,
+ int ArrayElementThreshold,
+ int ScalarLoadThreshold) {
if (UseDomTree)
- return new SROA_DT(Threshold);
- return new SROA_SSAUp(Threshold);
+ return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
+ ScalarLoadThreshold);
+ return new SROA_SSAUp(Threshold, StructMemberThreshold,
+ ArrayElementThreshold, ScalarLoadThreshold);
}
@@ -229,6 +263,7 @@ class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
+ unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
@@ -264,23 +299,33 @@ class ConvertToScalarInfo {
/// large integers unless there is some potential for optimization.
bool HadNonMemTransferAccess;
+ /// HadDynamicAccess - True if some element of this alloca was dynamic.
+ /// We don't yet have support for turning a dynamic access into a large
+ /// integer.
+ bool HadDynamicAccess;
+
public:
- explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
- VectorTy(0), HadNonMemTransferAccess(false) { }
+ explicit ConvertToScalarInfo(unsigned Size, const TargetData &td,
+ unsigned SLT)
+ : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
+ ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
- bool CanConvertToScalar(Value *V, uint64_t Offset);
+ bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
- void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
+ Value *NonConstantIdx);
Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
- uint64_t Offset, IRBuilder<> &Builder);
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
- uint64_t Offset, IRBuilder<> &Builder);
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
};
} // end anonymous namespace.
@@ -291,7 +336,7 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+ if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
return 0;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
@@ -316,16 +361,27 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
NewTy = VectorTy; // Use the vector type.
} else {
unsigned BitWidth = AllocaSize * 8;
+
+ // Do not convert to scalar integer if the alloca size exceeds the
+ // scalar load threshold.
+ if (BitWidth > ScalarLoadThreshold)
+ return 0;
+
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
return 0;
+ // Dynamic accesses on integers aren't yet supported. They need us to shift
+ // by a dynamic amount which could be difficult to work out as we might not
+ // know whether to use a left or right shift.
+ if (ScalarKind == Integer && HadDynamicAccess)
+ return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0);
+ ConvertUsesToScalar(AI, NewAI, 0, 0);
return NewAI;
}
@@ -412,7 +468,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
-bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
+ Value* NonConstantIdx) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
@@ -442,24 +499,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
- if (!CanConvertToScalar(BCI, Offset))
+ if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
return false;
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// If this is a GEP with a variable indices, we can't handle it.
- if (!GEP->hasAllConstantIndices())
+ PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
+ if (!PtrTy)
return false;
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- if (!GEP->getPointerOperandType()->isPointerTy())
- return false;
- uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ Value *GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ if (!isa<VectorType>(PtrTy->getElementType()))
+ return false;
+ if (NonConstantIdx)
+ return false;
+ GEPNonConstantIdx = Indices.pop_back_val();
+ if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
+ return false;
+ HadDynamicAccess = true;
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
+ uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
- if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+ if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
@@ -469,6 +537,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
// Store of constant value.
if (!isa<ConstantInt>(MSI->getValue()))
return false;
@@ -493,6 +564,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
@@ -524,12 +598,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right. By the end of this, there should be no uses of Ptr.
void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
- uint64_t Offset) {
+ uint64_t Offset,
+ Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
Instruction *User = cast<Instruction>(Ptr->use_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
- ConvertUsesToScalar(CI, NewAI, Offset);
+ ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
CI->eraseFromParent();
continue;
}
@@ -537,9 +612,11 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ if (!GEP->hasAllConstantIndices())
+ NonConstantIdx = Indices.pop_back_val();
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
GEP->eraseFromParent();
continue;
}
@@ -550,7 +627,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
- = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
+ NonConstantIdx, Builder);
LI->replaceAllUsesWith(NewLoadVal);
LI->eraseFromParent();
continue;
@@ -560,7 +638,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
- Builder);
+ NonConstantIdx, Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
@@ -575,6 +653,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
unsigned NumBytes = static_cast<unsigned>(SNumBytes);
@@ -591,7 +670,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, Builder);
+ Old, Offset, 0, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -607,6 +686,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
+ assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
@@ -679,7 +759,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
- uint64_t Offset, IRBuilder<> &Builder) {
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
@@ -701,7 +782,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ Value *V = Builder.CreateExtractElement(FromVal, Idx);
if (V->getType() != ToType)
V = Builder.CreateBitCast(V, ToType);
return V;
@@ -710,23 +801,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (StructType *ST = dyn_cast<StructType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- Builder);
+ 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, Builder);
+ Offset+i*EltSize, 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -792,9 +887,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
+///
+/// NonConstantIdx is an index value if there was a GEP with a non-constant
+/// index value. If this is 0 then all GEPs used to find this insert address
+/// are constant.
Value *ConvertToScalarInfo::
ConvertScalar_InsertValue(Value *SV, Value *Old,
- uint64_t Offset, IRBuilder<> &Builder) {
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
Type *AllocaType = Old->getType();
@@ -815,26 +915,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
SV = Builder.CreateBitCast(SV, EltTy);
uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ return Builder.CreateInsertElement(Old, SV, Idx);
}
// If SV is a first-class aggregate value, insert each value recursively.
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- Builder);
+ 0, Builder);
}
return Old;
}
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
}
return Old;
}
@@ -1335,15 +1449,14 @@ bool SROA::performPromotion(Function &F) {
/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
/// SROA. It must be a struct or array type with a small number of elements.
-static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
Type *T = AI->getAllocatedType();
- // Do not promote any struct into more than 32 separate vars.
+ // Do not promote any struct that has too many members.
if (StructType *ST = dyn_cast<StructType>(T))
- return ST->getNumElements() <= 32;
- // Arrays are much less likely to be safe for SROA; only consider
- // them if they are very small.
+ return ST->getNumElements() <= StructMemberThreshold;
+ // Do not promote any array that has too many elements.
if (ArrayType *AT = dyn_cast<ArrayType>(T))
- return AT->getNumElements() <= 8;
+ return AT->getNumElements() <= ArrayElementThreshold;
return false;
}
@@ -1448,8 +1561,8 @@ bool SROA::performScalarRepl(Function &F) {
// promoted itself. If so, we don't want to transform it needlessly. Note
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
- if (AllocaInst *NewAI =
- ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+ if (AllocaInst *NewAI = ConvertToScalarInfo(
+ (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1642,6 +1755,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
if (GEPIt == E)
return;
+ bool NonConstant = false;
+ unsigned NonConstantIdxSize = 0;
// Walk through the GEP type indices, checking the types that this indexes
// into.
@@ -1651,15 +1766,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal)
- return MarkUnsafe(Info, GEPI);
+ if (!IdxVal) {
+ // Non constant GEPs are only a problem on arrays, structs, and pointers
+ // Vectors can be dynamically indexed.
+ // FIXME: Add support for dynamic indexing on arrays. This should be
+ // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
+ // isn't.
+ if (!(*GEPIt)->isVectorTy())
+ return MarkUnsafe(Info, GEPI);
+ NonConstant = true;
+ NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
+ }
}
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If this GEP is non constant then the last operand must have been a
+ // dynamic index into a vector. Pop this now as it has no impact on the
+ // constant part of the offset.
+ if (NonConstant)
+ Indices.pop_back();
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
- if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
+ NonConstantIdxSize))
MarkUnsafe(Info, GEPI);
}
@@ -1764,6 +1894,12 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
+ } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
+ EltTy = VT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= VT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
} else {
return false;
}
@@ -1931,9 +2067,16 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
Offset -= Layout->getElementOffset(Idx);
IdxTy = Type::getInt32Ty(T->getContext());
return Idx;
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
}
- ArrayType *AT = cast<ArrayType>(T);
- T = AT->getElementType();
+ VectorType *VT = cast<VectorType>(T);
+ T = VT->getElementType();
uint64_t EltSize = TD->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
@@ -1948,6 +2091,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If the GEP was dynamic then it must have been a dynamic vector lookup.
+ // In this case, it must be the last GEP operand which is dynamic so keep that
+ // aside until we've found the constant GEP offset then add it back in at the
+ // end.
+ Value* NonConstantIdx = 0;
+ if (!GEPI->hasAllConstantIndices())
+ NonConstantIdx = Indices.pop_back_val();
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
@@ -1974,6 +2124,17 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
}
+ if (NonConstantIdx) {
+ Type* GepTy = T;
+ // This GEP has a dynamic index. We need to add "i32 0" to index through
+ // any structs or arrays in the original type until we get to the vector
+ // to index.
+ while (!isa<VectorType>(GepTy)) {
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U);
+ }
+ NewArgs.push_back(NonConstantIdx);
+ }
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index a66b3e3825..91158b429e 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -89,7 +89,6 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
/// ChangeToCall - Convert the specified invoke into a normal call.
static void ChangeToCall(InvokeInst *II) {
- BasicBlock *BB = II->getParent();
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
@@ -102,8 +101,8 @@ static void ChangeToCall(InvokeInst *II) {
BranchInst::Create(II->getNormalDest(), II);
// Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(BB);
- BB->getInstList().erase(II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
}
static bool MarkAliveBlocks(BasicBlock *BB,
@@ -157,11 +156,21 @@ static bool MarkAliveBlocks(BasicBlock *BB,
}
// Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
- ChangeToCall(II);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ ChangeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ } else
+ ChangeToCall(II);
Changed = true;
}
+ }
Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 99b05389b2..39647c7fc6 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -18,20 +18,20 @@
#define DEBUG_TYPE "simplify-libcalls"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
using namespace llvm;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 3859a1aec4..5576432149 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -671,12 +671,3 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
-/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a
-/// given basic block.
-DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
- if (const Instruction *I = BB->getFirstNonPHI())
- return I->getDebugLoc();
- // Scanning entire block may be too expensive, if the first instruction
- // does not have valid location info.
- return DebugLoc();
-}
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 344b860bde..27f7724417 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -12,18 +12,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Intrinsics.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
-#include "llvm/Support/IRBuilder.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 7f5cb5e096..4ff31cae62 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -29,3 +29,5 @@ add_llvm_library(LLVMTransformUtils
Utils.cpp
ValueMapper.cpp
)
+
+add_dependencies(LLVMTransformUtils intrinsics_gen)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 20052a4122..99237b8390 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
@@ -28,7 +29,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
#include <map>
using namespace llvm;
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index a0e027b5f1..1dac6b5b8b 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -53,7 +53,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
I->isConstant(), I->getLinkage(),
(Constant*) 0, I->getName(),
(GlobalVariable*) 0,
- I->isThreadLocal(),
+ I->getThreadLocalMode(),
I->getType()->getAddressSpace());
GV->copyAttributesFrom(I);
VMap[I] = GV;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f10dbbeef2..c545cd68c9 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -664,7 +664,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->setCondition(call);
TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
// Remove redundant case
- TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
+ SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
+ TheSwitch->removeCase(ToBeRemoved);
break;
}
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 9f8043d6fa..89e89e7acf 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,22 +13,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Attributes.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Attributes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index b08f8e21a0..bed7d72fff 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -14,31 +14,31 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -169,11 +169,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
SwitchInst::CaseIt FirstCase = SI->case_begin();
- IntegersSubset CaseRanges = FirstCase.getCaseValueEx();
- if (CaseRanges.getNumItems() == 1 && CaseRanges.isSingleNumber(0)) {
+ IntegersSubset& Case = FirstCase.getCaseValueEx();
+ if (Case.isSingleNumber()) {
// FIXME: Currently work with ConstantInt based numbers.
Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- CaseRanges.getItem(0).getLow().toConstantInt(),
+ Case.getSingleNumber(0).toConstantInt(),
"cond");
// Insert the new branch.
@@ -183,7 +183,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
// Delete the old switch.
SI->eraseFromParent();
return true;
-
}
}
return false;
@@ -266,7 +265,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
return isa<UndefValue>(II->getArgOperand(1));
}
- if (extractMallocCall(I) || extractCallocCall(I)) return true;
+ if (isAllocLikeFn(I)) return true;
if (CallInst *CI = isFreeCall(I))
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 8491c5582d..dbcf3b2fe2 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -14,8 +14,8 @@
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Module.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 2357d81916..dd5e20ed50 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -28,14 +28,14 @@
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/DIBuilder.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Metadata.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index e60a41b786..b3f5289fcd 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -190,8 +190,11 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return V;
}
- // Set DebugLoc.
- InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+ // Set the DebugLoc of the inserted PHI, if available.
+ DebugLoc DL;
+ if (const Instruction *I = BB->getFirstNonPHI())
+ DL = I->getDebugLoc();
+ InsertedPHI->setDebugLoc(DL);
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
@@ -230,28 +233,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
U.set(V);
}
-/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
-/// in the SSAUpdaterImpl template.
-namespace {
- class PHIiter {
- private:
- PHINode *PHI;
- unsigned idx;
-
- public:
- explicit PHIiter(PHINode *P) // begin iterator
- : PHI(P), idx(0) {}
- PHIiter(PHINode *P, bool) // end iterator
- : PHI(P), idx(PHI->getNumIncomingValues()) {}
-
- PHIiter &operator++() { ++idx; return *this; }
- bool operator==(const PHIiter& x) const { return idx == x.idx; }
- bool operator!=(const PHIiter& x) const { return !operator==(x); }
- Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
- BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
- };
-}
-
/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
/// specialized for SSAUpdater.
namespace llvm {
@@ -266,9 +247,26 @@ public:
static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
- typedef PHIiter PHI_iterator;
- static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
- static inline PHI_iterator PHI_end(PhiT *PHI) {
+ class PHI_iterator {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHI_iterator(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHI_iterator &operator++() { ++idx; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+
+ static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static PHI_iterator PHI_end(PhiT *PHI) {
return PHI_iterator(PHI, true);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 3d4d50a80a..f37ea91397 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -16,30 +16,30 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MDBuilder.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <set>
#include <map>
@@ -129,7 +129,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
///
static bool isProfitableToFoldUnconditional(BranchInst *SI1,
BranchInst *SI2,
- Instruction* Cond,
+ Instruction *Cond,
SmallVectorImpl<PHINode*> &PhiNodes) {
if (SI1 == SI2) return false; // Can't merge with self!
assert(SI1->isUnconditional() && SI2->isConditional());
@@ -156,7 +156,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
isa<PHINode>(BBI); ++BBI) {
PHINode *PN = cast<PHINode>(BBI);
if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
- !isa<Constant>(PN->getIncomingValueForBlock(SI2BB)))
+ !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
return false;
PhiNodes.push_back(PN);
}
@@ -1782,7 +1782,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- ConstantInt *PBI_C = dyn_cast<ConstantInt>(
+ ConstantInt *PBI_C = cast<ConstantInt>(
PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
assert(PBI_C->getType()->isIntegerTy(1));
Instruction *MergedCond = 0;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 1d08df59b3..62d23cb948 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -23,6 +23,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -41,6 +42,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <map>
@@ -66,6 +68,10 @@ static cl::opt<unsigned>
MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
cl::desc("The maximum number of pairing iterations"));
+static cl::opt<bool>
+Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to form non-2^n-length vectors"));
+
static cl::opt<unsigned>
MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
cl::desc("The maximum number of pairable instructions per group"));
@@ -76,6 +82,10 @@ MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
" a full cycle check"));
static cl::opt<bool>
+NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize boolean (i1) values"));
+
+static cl::opt<bool>
NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize integer values"));
@@ -104,6 +114,10 @@ NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize select instructions"));
static cl::opt<bool>
+NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize comparison instructions"));
+
+static cl::opt<bool>
NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize getelementptr instructions"));
@@ -182,12 +196,12 @@ namespace {
// FIXME: const correct?
- bool vectorizePairs(BasicBlock &BB);
+ bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false);
bool getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts);
+ std::vector<Value *> &PairableInsts, bool NonPow2Len);
void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
std::vector<Value *> &PairableInsts,
@@ -211,7 +225,7 @@ namespace {
bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
bool areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore);
+ bool IsSimpleLoadStore, bool NonPow2Len);
bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
@@ -263,26 +277,32 @@ namespace {
bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool &FlipMemInputs);
+ Instruction *J, unsigned o, bool FlipMemInputs);
void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
- unsigned IdxOffset, std::vector<Constant*> &Mask);
+ unsigned MaskOffset, unsigned NumInElem,
+ unsigned NumInElem1, unsigned IdxOffset,
+ std::vector<Constant*> &Mask);
Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
Instruction *J);
+ bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
+ unsigned o, Value *&LOp, unsigned numElemL,
+ Type *ArgTypeL, Type *ArgTypeR,
+ unsigned IdxOff = 0);
+
Value *getReplacementInput(LLVMContext& Context, Instruction *I,
Instruction *J, unsigned o, bool FlipMemInputs);
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
- bool &FlipMemInputs);
+ bool FlipMemInputs);
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
Instruction *J, Instruction *K,
Instruction *&InsertionPt, Instruction *&K1,
- Instruction *&K2, bool &FlipMemInputs);
+ Instruction *&K2, bool FlipMemInputs);
void collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs,
@@ -294,6 +314,10 @@ namespace {
DenseMap<Value *, Value *> &ChosenPairs,
std::multimap<Value *, Value *> &LoadMoveSet);
+ void collectPtrInfo(std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<Value *> &LowPtrInsts);
+
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
std::multimap<Value *, Value *> &LoadMoveSet,
Instruction *I, Instruction *J);
@@ -303,12 +327,15 @@ namespace {
Instruction *&InsertionPt,
Instruction *I, Instruction *J);
+ void combineMetadata(Instruction *K, const Instruction *J);
+
bool vectorizeBB(BasicBlock &BB) {
bool changed = false;
// Iterate a sufficient number of times to merge types of size 1 bit,
// then 2 bits, then 4, etc. up to half of the target vector width of the
// target vector register.
- for (unsigned v = 2, n = 1;
+ unsigned n = 1;
+ for (unsigned v = 2;
v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
v *= 2, ++n) {
DEBUG(dbgs() << "BBV: fusing loop #" << n <<
@@ -320,6 +347,16 @@ namespace {
break;
}
+ if (changed && !Pow2LenOnly) {
+ ++n;
+ for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
+ DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " <<
+ n << " for " << BB.getName() << " in " <<
+ BB.getParent()->getName() << "...\n");
+ if (!vectorizePairs(BB, true)) break;
+ }
+ }
+
DEBUG(dbgs() << "BBV: done!\n");
return changed;
}
@@ -341,15 +378,43 @@ namespace {
AU.setPreservesCFG();
}
- // This returns the vector type that holds a pair of the provided type.
- // If the provided type is already a vector, then its length is doubled.
- static inline VectorType *getVecTypeForPair(Type *ElemTy) {
+ static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) {
+ assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() &&
+ "Cannot form vector from incompatible scalar types");
+ Type *STy = ElemTy->getScalarType();
+
+ unsigned numElem;
if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
- unsigned numElem = VTy->getNumElements();
- return VectorType::get(ElemTy->getScalarType(), numElem*2);
+ numElem = VTy->getNumElements();
+ } else {
+ numElem = 1;
}
- return VectorType::get(ElemTy, 2);
+ if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
+ numElem += VTy->getNumElements();
+ } else {
+ numElem += 1;
+ }
+
+ return VectorType::get(STy, numElem);
+ }
+
+ static inline void getInstructionTypes(Instruction *I,
+ Type *&T1, Type *&T2) {
+ if (isa<StoreInst>(I)) {
+ // For stores, it is the value type, not the pointer type that matters
+ // because the value is what will come from a vector register.
+
+ Value *IVal = cast<StoreInst>(I)->getValueOperand();
+ T1 = IVal->getType();
+ } else {
+ T1 = I->getType();
+ }
+
+ if (I->isCast())
+ T2 = cast<CastInst>(I)->getSrcTy();
+ else
+ T2 = T1;
}
// Returns the weight associated with the provided value. A chain of
@@ -385,8 +450,7 @@ namespace {
// true if the offset could be determined to be some constant value.
// For example, if OffsetInElmts == 1, then J accesses the memory directly
// after I; if OffsetInElmts == -1 then I accesses the memory
- // directly after J. This function assumes that both instructions
- // have the same type.
+ // directly after J.
bool getPairPtrInfo(Instruction *I, Instruction *J,
Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
int64_t &OffsetInElmts) {
@@ -418,7 +482,12 @@ namespace {
Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
- assert(VTy == cast<PointerType>(JPtr->getType())->getElementType());
+ Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ if (VTy != VTy2 && Offset < 0) {
+ int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
+ OffsetInElmts = Offset/VTy2TSS;
+ return (abs64(Offset) % VTy2TSS) == 0;
+ }
OffsetInElmts = Offset/VTyTSS;
return (abs64(Offset) % VTyTSS) == 0;
@@ -471,7 +540,7 @@ namespace {
// This function implements one vectorization iteration on the provided
// basic block. It returns true if the block is changed.
- bool BBVectorize::vectorizePairs(BasicBlock &BB) {
+ bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) {
bool ShouldContinue;
BasicBlock::iterator Start = BB.getFirstInsertionPt();
@@ -482,7 +551,7 @@ namespace {
std::vector<Value *> PairableInsts;
std::multimap<Value *, Value *> CandidatePairs;
ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
- PairableInsts);
+ PairableInsts, NonPow2Len);
if (PairableInsts.empty()) continue;
// Now we have a map of all of the pairable instructions and we need to
@@ -529,6 +598,10 @@ namespace {
// passes should coalesce the build/extract combinations.
fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs);
+
+ // It is important to cleanup here so that future iterations of this
+ // function have less work to do.
+ (void) SimplifyInstructionsInBlock(&BB, TD);
return true;
}
@@ -567,6 +640,9 @@ namespace {
} else if (isa<SelectInst>(I)) {
if (!Config.VectorizeSelect)
return false;
+ } else if (isa<CmpInst>(I)) {
+ if (!Config.VectorizeCmp)
+ return false;
} else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
if (!Config.VectorizeGEP)
return false;
@@ -584,30 +660,22 @@ namespace {
return false;
Type *T1, *T2;
- if (isa<StoreInst>(I)) {
- // For stores, it is the value type, not the pointer type that matters
- // because the value is what will come from a vector register.
-
- Value *IVal = cast<StoreInst>(I)->getValueOperand();
- T1 = IVal->getType();
- } else {
- T1 = I->getType();
- }
-
- if (I->isCast())
- T2 = cast<CastInst>(I)->getSrcTy();
- else
- T2 = T1;
+ getInstructionTypes(I, T1, T2);
// Not every type can be vectorized...
if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
!(VectorType::isValidElementType(T2) || T2->isVectorTy()))
return false;
- if (!Config.VectorizeInts
- && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
- return false;
-
+ if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) {
+ if (!Config.VectorizeBools)
+ return false;
+ } else {
+ if (!Config.VectorizeInts
+ && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+ return false;
+ }
+
if (!Config.VectorizeFloats
&& (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
return false;
@@ -623,8 +691,8 @@ namespace {
T2->getScalarType()->isPointerTy()))
return false;
- if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
- T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
+ if (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
+ T2->getPrimitiveSizeInBits() >= Config.VectorBits)
return false;
return true;
@@ -635,36 +703,25 @@ namespace {
// that I has already been determined to be vectorizable and that J is not
// in the use tree of I.
bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore) {
+ bool IsSimpleLoadStore, bool NonPow2Len) {
DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
" <-> " << *J << "\n");
// Loads and stores can be merged if they have different alignments,
// but are otherwise the same.
- LoadInst *LI, *LJ;
- StoreInst *SI, *SJ;
- if ((LI = dyn_cast<LoadInst>(I)) && (LJ = dyn_cast<LoadInst>(J))) {
- if (I->getType() != J->getType())
- return false;
+ if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
+ (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0)))
+ return false;
- if (LI->getPointerOperand()->getType() !=
- LJ->getPointerOperand()->getType() ||
- LI->isVolatile() != LJ->isVolatile() ||
- LI->getOrdering() != LJ->getOrdering() ||
- LI->getSynchScope() != LJ->getSynchScope())
- return false;
- } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) {
- if (SI->getValueOperand()->getType() !=
- SJ->getValueOperand()->getType() ||
- SI->getPointerOperand()->getType() !=
- SJ->getPointerOperand()->getType() ||
- SI->isVolatile() != SJ->isVolatile() ||
- SI->getOrdering() != SJ->getOrdering() ||
- SI->getSynchScope() != SJ->getSynchScope())
- return false;
- } else if (!J->isSameOperationAs(I)) {
+ Type *IT1, *IT2, *JT1, *JT2;
+ getInstructionTypes(I, IT1, IT2);
+ getInstructionTypes(J, JT1, JT2);
+ unsigned MaxTypeBits = std::max(
+ IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
+ IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
+ if (MaxTypeBits > Config.VectorBits)
return false;
- }
+
// FIXME: handle addsub-type operations!
if (IsSimpleLoadStore) {
@@ -674,8 +731,11 @@ namespace {
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts) && abs64(OffsetInElmts) == 1) {
if (Config.AlignedOnly) {
- Type *aType = isa<StoreInst>(I) ?
+ Type *aTypeI = isa<StoreInst>(I) ?
cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+ Type *aTypeJ = isa<StoreInst>(J) ?
+ cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+
// An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the
// vector type.
@@ -683,7 +743,7 @@ namespace {
unsigned BottomAlignment = IAlignment;
if (OffsetInElmts < 0) BottomAlignment = JAlignment;
- Type *VType = getVecTypeForPair(aType);
+ Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
@@ -691,11 +751,6 @@ namespace {
} else {
return false;
}
- } else if (isa<ShuffleVectorInst>(I)) {
- // Only merge two shuffles if they're both constant
- return isa<Constant>(I->getOperand(2)) &&
- isa<Constant>(J->getOperand(2));
- // FIXME: We may want to vectorize non-constant shuffles also.
}
// The powi intrinsic is special because only the first argument is
@@ -778,7 +833,7 @@ namespace {
bool BBVectorize::getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts) {
+ std::vector<Value *> &PairableInsts, bool NonPow2Len) {
BasicBlock::iterator E = BB.end();
if (Start == E) return false;
@@ -814,7 +869,7 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible.
- if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue;
+ if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue;
// J is a candidate for merging with I.
if (!PairableInsts.size() ||
@@ -1436,24 +1491,27 @@ namespace {
// instruction that fuses I with J.
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
Instruction *I, Instruction *J, unsigned o,
- bool &FlipMemInputs) {
+ bool FlipMemInputs) {
Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment;
int64_t OffsetInElmts;
+
+ // Note: the analysis might fail here, that is why FlipMemInputs has
+ // been precomputed (OffsetInElmts must be unused here).
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts);
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr;
- if (OffsetInElmts > 0) {
+ if (!FlipMemInputs) {
VPtr = IPtr;
} else {
- FlipMemInputs = true;
VPtr = JPtr;
}
- Type *ArgType = cast<PointerType>(IPtr->getType())->getElementType();
- Type *VArgType = getVecTypeForPair(ArgType);
+ Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
Type *VArgPtrType = PointerType::get(VArgType,
cast<PointerType>(IPtr->getType())->getAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
@@ -1461,15 +1519,17 @@ namespace {
}
void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
- unsigned IdxOffset, std::vector<Constant*> &Mask) {
- for (unsigned v = 0; v < NumElem/2; ++v) {
+ unsigned MaskOffset, unsigned NumInElem,
+ unsigned NumInElem1, unsigned IdxOffset,
+ std::vector<Constant*> &Mask) {
+ unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ for (unsigned v = 0; v < NumElem1; ++v) {
int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
if (m < 0) {
Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
} else {
unsigned mm = m + (int) IdxOffset;
- if (m >= (int) NumInElem)
+ if (m >= (int) NumInElem1)
mm += (int) NumInElem;
Mask[v+MaskOffset] =
@@ -1485,8 +1545,11 @@ namespace {
// This is the shuffle mask. We need to append the second
// mask to the first, and the numbers need to be adjusted.
- Type *ArgType = I->getType();
- Type *VArgType = getVecTypeForPair(ArgType);
+ Type *ArgTypeI = I->getType();
+ Type *ArgTypeJ = J->getType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
+
+ unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
// Get the total number of elements in the fused vector type.
// By definition, this must equal the number of elements in
@@ -1494,19 +1557,81 @@ namespace {
unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
std::vector<Constant*> Mask(NumElem);
- Type *OpType = I->getOperand(0)->getType();
- unsigned NumInElem = cast<VectorType>(OpType)->getNumElements();
+ Type *OpTypeI = I->getOperand(0)->getType();
+ unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ Type *OpTypeJ = J->getOperand(0)->getType();
+ unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+
+ // The fused vector will be:
+ // -----------------------------------------------------
+ // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ |
+ // -----------------------------------------------------
+ // from which we'll extract NumElem total elements (where the first NumElemI
+ // of them come from the mask in I and the remainder come from the mask
+ // in J.
// For the mask from the first pair...
- fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask);
+ fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
+ 0, Mask);
// For the mask from the second pair...
- fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem,
- Mask);
+ fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
+ NumInElemI, Mask);
return ConstantVector::get(Mask);
}
+ bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, Value *&LOp,
+ unsigned numElemL,
+ Type *ArgTypeL, Type *ArgTypeH,
+ unsigned IdxOff) {
+ bool ExpandedIEChain = false;
+ if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
+ // If we have a pure insertelement chain, then this can be rewritten
+ // into a chain that directly builds the larger type.
+ bool PureChain = true;
+ InsertElementInst *LIENext = LIE;
+ do {
+ if (!isa<UndefValue>(LIENext->getOperand(0)) &&
+ !isa<InsertElementInst>(LIENext->getOperand(0))) {
+ PureChain = false;
+ break;
+ }
+ } while ((LIENext =
+ dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
+
+ if (PureChain) {
+ SmallVector<Value *, 8> VectElemts(numElemL,
+ UndefValue::get(ArgTypeL->getScalarType()));
+ InsertElementInst *LIENext = LIE;
+ do {
+ unsigned Idx =
+ cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue();
+ VectElemts[Idx] = LIENext->getOperand(1);
+ } while ((LIENext =
+ dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
+
+ LIENext = 0;
+ Value *LIEPrev = UndefValue::get(ArgTypeH);
+ for (unsigned i = 0; i < numElemL; ++i) {
+ if (isa<UndefValue>(VectElemts[i])) continue;
+ LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
+ ConstantInt::get(Type::getInt32Ty(Context),
+ i + IdxOff),
+ getReplacementName(I, true, o, i+1));
+ LIENext->insertBefore(J);
+ LIEPrev = LIENext;
+ }
+
+ LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH);
+ ExpandedIEChain = true;
+ }
+ }
+
+ return ExpandedIEChain;
+ }
+
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -1514,84 +1639,333 @@ namespace {
Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
- // Compute the fused vector type for this operand
- Type *ArgType = I->getOperand(o)->getType();
- VectorType *VArgType = getVecTypeForPair(ArgType);
+ // Compute the fused vector type for this operand
+ Type *ArgTypeI = I->getOperand(o)->getType();
+ Type *ArgTypeJ = J->getOperand(o)->getType();
+ VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
Instruction *L = I, *H = J;
+ Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
if (FlipMemInputs) {
L = J;
H = I;
+ ArgTypeL = ArgTypeJ;
+ ArgTypeH = ArgTypeI;
}
- if (ArgType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
- std::vector<Constant*> Mask(numElem);
- for (unsigned v = 0; v < numElem; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ unsigned numElemL;
+ if (ArgTypeL->isVectorTy())
+ numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
+ else
+ numElemL = 1;
- Instruction *BV = new ShuffleVectorInst(L->getOperand(o),
- H->getOperand(o),
- ConstantVector::get(Mask),
- getReplacementName(I, true, o));
- BV->insertBefore(J);
- return BV;
+ unsigned numElemH;
+ if (ArgTypeH->isVectorTy())
+ numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
+ else
+ numElemH = 1;
+
+ Value *LOp = L->getOperand(o);
+ Value *HOp = H->getOperand(o);
+ unsigned numElem = VArgType->getNumElements();
+
+ // First, we check if we can reuse the "original" vector outputs (if these
+ // exist). We might need a shuffle.
+ ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp);
+ ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp);
+ ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp);
+ ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp);
+
+ // FIXME: If we're fusing shuffle instructions, then we can't apply this
+ // optimization. The input vectors to the shuffle might be a different
+ // length from the shuffle outputs. Unfortunately, the replacement
+ // shuffle mask has already been formed, and the mask entries are sensitive
+ // to the sizes of the inputs.
+ bool IsSizeChangeShuffle =
+ isa<ShuffleVectorInst>(L) &&
+ (LOp->getType() != L->getType() || HOp->getType() != H->getType());
+
+ if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
+ // We can have at most two unique vector inputs.
+ bool CanUseInputs = true;
+ Value *I1, *I2 = 0;
+ if (LEE) {
+ I1 = LEE->getOperand(0);
+ } else {
+ I1 = LSV->getOperand(0);
+ I2 = LSV->getOperand(1);
+ if (I2 == I1 || isa<UndefValue>(I2))
+ I2 = 0;
+ }
+
+ if (HEE) {
+ Value *I3 = HEE->getOperand(0);
+ if (!I2 && I3 != I1)
+ I2 = I3;
+ else if (I3 != I1 && I3 != I2)
+ CanUseInputs = false;
+ } else {
+ Value *I3 = HSV->getOperand(0);
+ if (!I2 && I3 != I1)
+ I2 = I3;
+ else if (I3 != I1 && I3 != I2)
+ CanUseInputs = false;
+
+ if (CanUseInputs) {
+ Value *I4 = HSV->getOperand(1);
+ if (!isa<UndefValue>(I4)) {
+ if (!I2 && I4 != I1)
+ I2 = I4;
+ else if (I4 != I1 && I4 != I2)
+ CanUseInputs = false;
+ }
+ }
+ }
+
+ if (CanUseInputs) {
+ unsigned LOpElem =
+ cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
+ ->getNumElements();
+ unsigned HOpElem =
+ cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
+ ->getNumElements();
+
+ // We have one or two input vectors. We need to map each index of the
+ // operands to the index of the original vector.
+ SmallVector<std::pair<int, int>, 8> II(numElem);
+ for (unsigned i = 0; i < numElemL; ++i) {
+ int Idx, INum;
+ if (LEE) {
+ Idx =
+ cast<ConstantInt>(LEE->getOperand(1))->getSExtValue();
+ INum = LEE->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx = LSV->getMaskValue(i);
+ if (Idx < (int) LOpElem) {
+ INum = LSV->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx -= LOpElem;
+ INum = LSV->getOperand(1) == I1 ? 0 : 1;
+ }
+ }
+
+ II[i] = std::pair<int, int>(Idx, INum);
+ }
+ for (unsigned i = 0; i < numElemH; ++i) {
+ int Idx, INum;
+ if (HEE) {
+ Idx =
+ cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
+ INum = HEE->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx = HSV->getMaskValue(i);
+ if (Idx < (int) HOpElem) {
+ INum = HSV->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx -= HOpElem;
+ INum = HSV->getOperand(1) == I1 ? 0 : 1;
+ }
+ }
+
+ II[i + numElemL] = std::pair<int, int>(Idx, INum);
+ }
+
+ // We now have an array which tells us from which index of which
+ // input vector each element of the operand comes.
+ VectorType *I1T = cast<VectorType>(I1->getType());
+ unsigned I1Elem = I1T->getNumElements();
+
+ if (!I2) {
+ // In this case there is only one underlying vector input. Check for
+ // the trivial case where we can use the input directly.
+ if (I1Elem == numElem) {
+ bool ElemInOrder = true;
+ for (unsigned i = 0; i < numElem; ++i) {
+ if (II[i].first != (int) i && II[i].first != -1) {
+ ElemInOrder = false;
+ break;
+ }
+ }
+
+ if (ElemInOrder)
+ return I1;
+ }
+
+ // A shuffle is needed.
+ std::vector<Constant *> Mask(numElem);
+ for (unsigned i = 0; i < numElem; ++i) {
+ int Idx = II[i].first;
+ if (Idx == -1)
+ Mask[i] = UndefValue::get(Type::getInt32Ty(Context));
+ else
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+
+ Instruction *S =
+ new ShuffleVectorInst(I1, UndefValue::get(I1T),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
+ S->insertBefore(J);
+ return S;
+ }
+
+ VectorType *I2T = cast<VectorType>(I2->getType());
+ unsigned I2Elem = I2T->getNumElements();
+
+ // This input comes from two distinct vectors. The first step is to
+ // make sure that both vectors are the same length. If not, the
+ // smaller one will need to grow before they can be shuffled together.
+ if (I1Elem < I2Elem) {
+ std::vector<Constant *> Mask(I2Elem);
+ unsigned v = 0;
+ for (; v < I1Elem; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < I2Elem; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ Instruction *NewI1 =
+ new ShuffleVectorInst(I1, UndefValue::get(I1T),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o, 1));
+ NewI1->insertBefore(J);
+ I1 = NewI1;
+ I1T = I2T;
+ I1Elem = I2Elem;
+ } else if (I1Elem > I2Elem) {
+ std::vector<Constant *> Mask(I1Elem);
+ unsigned v = 0;
+ for (; v < I2Elem; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < I1Elem; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ Instruction *NewI2 =
+ new ShuffleVectorInst(I2, UndefValue::get(I2T),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o, 1));
+ NewI2->insertBefore(J);
+ I2 = NewI2;
+ I2T = I1T;
+ I2Elem = I1Elem;
+ }
+
+ // Now that both I1 and I2 are the same length we can shuffle them
+ // together (and use the result).
+ std::vector<Constant *> Mask(numElem);
+ for (unsigned v = 0; v < numElem; ++v) {
+ if (II[v].first == -1) {
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+ } else {
+ int Idx = II[v].first + II[v].second * I1Elem;
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+ }
+
+ Instruction *NewOp =
+ new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
+ NewOp->insertBefore(J);
+ return NewOp;
+ }
}
- // If these two inputs are the output of another vector instruction,
- // then we should use that output directly. It might be necessary to
- // permute it first. [When pairings are fused recursively, you can
- // end up with cases where a large vector is decomposed into scalars
- // using extractelement instructions, then built into size-2
- // vectors using insertelement and the into larger vectors using
- // shuffles. InstCombine does not simplify all of these cases well,
- // and so we make sure that shuffles are generated here when possible.
- ExtractElementInst *LEE
- = dyn_cast<ExtractElementInst>(L->getOperand(o));
- ExtractElementInst *HEE
- = dyn_cast<ExtractElementInst>(H->getOperand(o));
-
- if (LEE && HEE &&
- LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) {
- VectorType *EEType = cast<VectorType>(LEE->getOperand(0)->getType());
- unsigned LowIndx = cast<ConstantInt>(LEE->getOperand(1))->getZExtValue();
- unsigned HighIndx = cast<ConstantInt>(HEE->getOperand(1))->getZExtValue();
- if (LEE->getOperand(0) == HEE->getOperand(0)) {
- if (LowIndx == 0 && HighIndx == 1)
- return LEE->getOperand(0);
-
- std::vector<Constant*> Mask(2);
- Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
- Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
-
- Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
- UndefValue::get(EEType),
- ConstantVector::get(Mask),
- getReplacementName(I, true, o));
- BV->insertBefore(J);
- return BV;
+ Type *ArgType = ArgTypeL;
+ if (numElemL < numElemH) {
+ if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
+ ArgTypeL, VArgType, 1)) {
+ // This is another short-circuit case: we're combining a scalar into
+ // a vector that is formed by an IE chain. We've just expanded the IE
+ // chain, now insert the scalar and we're done.
+
+ Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
+ getReplacementName(I, true, o));
+ S->insertBefore(J);
+ return S;
+ } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
+ ArgTypeH)) {
+ // The two vector inputs to the shuffle must be the same length,
+ // so extend the smaller vector to be the same length as the larger one.
+ Instruction *NLOp;
+ if (numElemL > 1) {
+
+ std::vector<Constant *> Mask(numElemH);
+ unsigned v = 0;
+ for (; v < numElemL; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < numElemH; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o, 1));
+ } else {
+ NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
+ getReplacementName(I, true, o, 1));
+ }
+
+ NLOp->insertBefore(J);
+ LOp = NLOp;
}
- std::vector<Constant*> Mask(2);
- HighIndx += EEType->getNumElements();
- Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
- Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
+ ArgType = ArgTypeH;
+ } else if (numElemL > numElemH) {
+ if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
+ ArgTypeH, VArgType)) {
+ Instruction *S =
+ InsertElementInst::Create(LOp, HOp,
+ ConstantInt::get(Type::getInt32Ty(Context),
+ numElemL),
+ getReplacementName(I, true, o));
+ S->insertBefore(J);
+ return S;
+ } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
+ ArgTypeL)) {
+ Instruction *NHOp;
+ if (numElemH > 1) {
+ std::vector<Constant *> Mask(numElemL);
+ unsigned v = 0;
+ for (; v < numElemH; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < numElemL; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o, 1));
+ } else {
+ NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
+ getReplacementName(I, true, o, 1));
+ }
+
+ NHOp->insertBefore(J);
+ HOp = NHOp;
+ }
+ }
- Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
- HEE->getOperand(0),
- ConstantVector::get(Mask),
- getReplacementName(I, true, o));
+ if (ArgType->isVectorTy()) {
+ unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ std::vector<Constant*> Mask(numElem);
+ for (unsigned v = 0; v < numElem; ++v) {
+ unsigned Idx = v;
+ // If the low vector was expanded, we need to skip the extra
+ // undefined entries.
+ if (v >= numElemL && numElemH > numElemL)
+ Idx += (numElemH - numElemL);
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+
+ Instruction *BV = new ShuffleVectorInst(LOp, HOp,
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
BV->insertBefore(J);
return BV;
}
Instruction *BV1 = InsertElementInst::Create(
- UndefValue::get(VArgType),
- L->getOperand(o), CV0,
+ UndefValue::get(VArgType), LOp, CV0,
getReplacementName(I, true, o, 1));
BV1->insertBefore(I);
- Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o),
- CV1,
+ Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
getReplacementName(I, true, o, 2));
BV2->insertBefore(J);
return BV2;
@@ -1602,8 +1976,7 @@ namespace {
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
Instruction *I, Instruction *J,
SmallVector<Value *, 3> &ReplacedOperands,
- bool &FlipMemInputs) {
- FlipMemInputs = false;
+ bool FlipMemInputs) {
unsigned NumOperands = I->getNumOperands();
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
@@ -1622,10 +1995,10 @@ namespace {
BasicBlock &BB = *I->getParent();
Module *M = BB.getParent()->getParent();
- Type *ArgType = I->getType();
- Type *VArgType = getVecTypeForPair(ArgType);
+ Type *ArgTypeI = I->getType();
+ Type *ArgTypeJ = J->getType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- // FIXME: is it safe to do this here?
ReplacedOperands[o] = Intrinsic::getDeclaration(M,
(Intrinsic::ID) IID, VArgType);
continue;
@@ -1654,36 +2027,60 @@ namespace {
Instruction *J, Instruction *K,
Instruction *&InsertionPt,
Instruction *&K1, Instruction *&K2,
- bool &FlipMemInputs) {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
-
+ bool FlipMemInputs) {
if (isa<StoreInst>(I)) {
AA->replaceWithNewValue(I, K);
AA->replaceWithNewValue(J, K);
} else {
Type *IType = I->getType();
- Type *VType = getVecTypeForPair(IType);
+ Type *JType = J->getType();
+
+ VectorType *VType = getVecTypeForPair(IType, JType);
+ unsigned numElem = VType->getNumElements();
+
+ unsigned numElemI, numElemJ;
+ if (IType->isVectorTy())
+ numElemI = cast<VectorType>(IType)->getNumElements();
+ else
+ numElemI = 1;
+
+ if (JType->isVectorTy())
+ numElemJ = cast<VectorType>(JType)->getNumElements();
+ else
+ numElemJ = 1;
if (IType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(IType)->getNumElements();
- std::vector<Constant*> Mask1(numElem), Mask2(numElem);
- for (unsigned v = 0; v < numElem; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v);
- }
+ std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
+ for (unsigned v = 0; v < numElemI; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
+ }
- K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(
- FlipMemInputs ? Mask2 : Mask1),
- getReplacementName(K, false, 1));
- K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(
- FlipMemInputs ? Mask1 : Mask2),
- getReplacementName(K, false, 2));
+ K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(
+ FlipMemInputs ? Mask2 : Mask1),
+ getReplacementName(K, false, 1));
} else {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0,
getReplacementName(K, false, 1));
+ }
+
+ if (JType->isVectorTy()) {
+ std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
+ for (unsigned v = 0; v < numElemJ; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
+ }
+
+ K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(
+ FlipMemInputs ? Mask1 : Mask2),
+ getReplacementName(K, false, 2));
+ } else {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1,
getReplacementName(K, false, 2));
}
@@ -1784,6 +2181,61 @@ namespace {
}
}
+ // As with the aliasing information, SCEV can also change because of
+ // vectorization. This information is used to compute relative pointer
+ // offsets; the necessary information will be cached here prior to
+ // fusion.
+ void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<Value *> &LowPtrInsts) {
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PIE = PairableInsts.end(); PI != PIE; ++PI) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
+ if (P == ChosenPairs.end()) continue;
+
+ Instruction *I = cast<Instruction>(P->first);
+ Instruction *J = cast<Instruction>(P->second);
+
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ continue;
+
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment;
+ int64_t OffsetInElmts;
+ if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ OffsetInElmts) || abs64(OffsetInElmts) != 1)
+ llvm_unreachable("Pre-fusion pointer analysis failed");
+
+ Value *LowPI = (OffsetInElmts > 0) ? I : J;
+ LowPtrInsts.insert(LowPI);
+ }
+ }
+
+ // When the first instruction in each pair is cloned, it will inherit its
+ // parent's metadata. This metadata must be combined with that of the other
+ // instruction in a safe way.
+ void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) {
+ SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
+ K->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *JMD = J->getMetadata(Kind);
+ MDNode *KMD = Metadata[i].second;
+
+ switch (Kind) {
+ default:
+ K->setMetadata(Kind, 0); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_tbaa:
+ K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
+ break;
+ case LLVMContext::MD_fpmath:
+ K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
+ break;
+ }
+ }
+ }
+
// This function fuses the chosen instruction pairs into vector instructions,
// taking care preserve any needed scalar outputs and, then, it reorders the
// remaining instructions as needed (users of the first member of the pair
@@ -1810,6 +2262,9 @@ namespace {
std::multimap<Value *, Value *> LoadMoveSet;
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+ DenseSet<Value *> LowPtrInsts;
+ collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
+
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@@ -1849,7 +2304,10 @@ namespace {
continue;
}
- bool FlipMemInputs;
+ bool FlipMemInputs = false;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
+
unsigned NumOperands = I->getNumOperands();
SmallVector<Value *, 3> ReplacedOperands(NumOperands);
getReplacementInputsForPair(Context, I, J, ReplacedOperands,
@@ -1861,7 +2319,9 @@ namespace {
if (I->hasName()) K->takeName(I);
if (!isa<StoreInst>(K))
- K->mutateType(getVecTypeForPair(I->getType()));
+ K->mutateType(getVecTypeForPair(I->getType(), J->getType()));
+
+ combineMetadata(K, J);
for (unsigned o = 0; o < NumOperands; ++o)
K->setOperand(o, ReplacedOperands[o]);
@@ -1953,6 +2413,7 @@ llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
//===----------------------------------------------------------------------===//
VectorizeConfig::VectorizeConfig() {
VectorBits = ::VectorBits;
+ VectorizeBools = !::NoBools;
VectorizeInts = !::NoInts;
VectorizeFloats = !::NoFloats;
VectorizePointers = !::NoPointers;
@@ -1960,6 +2421,7 @@ VectorizeConfig::VectorizeConfig() {
VectorizeMath = !::NoMath;
VectorizeFMA = !::NoFMA;
VectorizeSelect = !::NoSelect;
+ VectorizeCmp = !::NoCmp;
VectorizeGEP = !::NoGEP;
VectorizeMemOps = !::NoMemOps;
AlignedOnly = ::AlignedOnly;
@@ -1969,6 +2431,7 @@ VectorizeConfig::VectorizeConfig() {
SplatBreaksChain = ::SplatBreaksChain;
MaxInsts = ::MaxInsts;
MaxIter = ::MaxIter;
+ Pow2LenOnly = ::Pow2LenOnly;
NoMemOpBoost = ::NoMemOpBoost;
FastDep = ::FastDep;
}
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index 4b6693015c..06cf1e4e53 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -2,3 +2,5 @@ add_llvm_library(LLVMVectorize
BBVectorize.cpp
Vectorize.cpp
)
+
+add_dependencies(LLVMVectorize intrinsics_gen)