aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-10-25 11:26:14 -0700
committerDerek Schuff <dschuff@chromium.org>2012-10-25 11:26:14 -0700
commit5c897cf45a7b9df227e0c562c27454f56ba86c20 (patch)
treeae3a9ea4d11bfb20379639dd9ca0951ce411e73a /lib/Transforms
parent89758b0545198f9a3876f0deb747146cbd84ce61 (diff)
parenta8a0a155de16830b8fcab539ba2ec21de3145532 (diff)
Merge commit 'a8a0a155de16830b8fcab539ba2ec21de3145532'
Conflicts: lib/Target/X86/X86FrameLowering.cpp lib/Target/X86/X86ISelLowering.cpp The Intel folks switched some of the FrameLowering code to use X86RegisterInfo::getSlotSize isntead of pointer size, thus reducing our localmods in that file.
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp290
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp11
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp79
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp12
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp51
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp129
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp26
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp62
-rw-r--r--lib/Transforms/Utils/Local.cpp3
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp14
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp34
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp388
21 files changed, 781 insertions, 363 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 678189b3d6..3d5657fe6a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1500,7 +1500,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GV->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1730,7 +1730,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GV->getType());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 44283ddce7..1c6477c022 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -206,9 +206,8 @@ bool FunctionComparator::isEquivalentType(Type *Ty1,
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
if (TD) {
- LLVMContext &Ctx = Ty1->getContext();
- if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
- if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2)) return true;
}
return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 7467eca7ab..0e765f7aaa 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -208,7 +208,7 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
+ Type *FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy,
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4f4c388a92..0958842d08 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -996,9 +996,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Conversion is ok if changing from one pointer type to another or from
// a pointer to an integer of the same size.
!((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+ OldRetTy == TD->getIntPtrType(NewRetTy)) &&
(NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+ NewRetTy == TD->getIntPtrType(OldRetTy))))
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
@@ -1057,11 +1057,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
+ // FIXME: Not sure what to do here, so setting AS to 0.
+ // How can the AS for a function call be outside the default?
bool isConvertible = ActTy == ParamTy ||
(TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+ ParamTy == TD->getIntPtrType(ActTy)) &&
(ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
+ ActTy == TD->getIntPtrType(ParamTy))));
if (Callee->isDeclaration() && !isConvertible) return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f3f3f8f585..119d2f5c99 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Scale = 0;
return ConstantInt::get(Val->getType(), 0);
}
-
+
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
// Cannot look past anything that might overflow.
OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
@@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Mul) {
// This value is scaled by 'RHS'.
Scale = RHS->getZExtValue();
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Add) {
- // We have X+C. Check to see if we really have (X*C2)+C1,
+ // We have X+C. Check to see if we really have (X*C2)+C1,
// where C1 is divisible by C2.
unsigned SubScale;
- Value *SubVal =
+ Value *SubVal =
DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
if (!TD) return 0;
PointerType *PTy = cast<PointerType>(CI.getType());
-
+
BuilderTy AllocaBuilder(*Builder);
AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
@@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
-
+
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
@@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// Insert before the alloca, not before the cast.
Amt = AllocaBuilder.CreateMul(Amt, NumElements);
}
-
+
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
Amt = AllocaBuilder.CreateAdd(Amt, Off);
}
-
+
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
-
+
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
// will die soon.
@@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
+/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
break;
- }
+ }
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
// new.
if (I->getOperand(0)->getType() == Ty)
return I->getOperand(0);
-
+
// Otherwise, must be the same type of cast, so just reinsert a new one.
// This also handles the case of zext(trunc(x)) -> zext(x).
Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
@@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Res = NPN;
break;
}
- default:
+ default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
}
-
+
Res->takeName(I);
return InsertNewInstWith(Res, *I);
}
@@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps
+static Instruction::CastOps
isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
@@ -238,19 +238,18 @@ isEliminableCastPair(
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy,
- TD ? TD->getIntPtrType(CI->getContext()) : 0);
-
+ TD ? TD->getIntPtrType(DstTy) : 0);
+
// We don't want to form an inttoptr or ptrtoint that converts to an integer
// type that differs from the pointer size.
if ((Res == Instruction::IntToPtr &&
- (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+ (!TD || SrcTy != TD->getIntPtrType(DstTy))) ||
(Res == Instruction::PtrToInt &&
- (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+ (!TD || DstTy != TD->getIntPtrType(SrcTy))))
Res = 0;
-
+
return Instruction::CastOps(Res);
}
@@ -262,18 +261,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
-
+
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
-
+
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
return false;
-
+
return true;
}
@@ -285,7 +284,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// Many cases of "cast of a cast" are eliminable. If it's eliminable we just
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
- if (Instruction::CastOps opc =
+ if (Instruction::CastOps opc =
isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
@@ -308,7 +307,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
}
-
+
return 0;
}
@@ -327,15 +326,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
Type *OrigTy = V->getType();
-
+
// If this is an extension from the dest type, we can eliminate it, even if it
// has multiple uses.
- if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
I->getOperand(0)->getType() == Ty)
return true;
@@ -420,29 +419,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType(), *SrcTy = Src->getType();
-
+
// Attempt to truncate the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
-
+
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -459,7 +458,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
-
+
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
Value *A = 0; ConstantInt *Cst = 0;
if (Src->hasOneUse() &&
@@ -469,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// ASize < MidSize and MidSize > ResultSize, but don't know the relation
// between ASize and ResultSize.
unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-
+
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
@@ -482,7 +481,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
}
-
+
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
@@ -505,7 +504,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// cast to integer to avoid the comparison.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
const APInt &Op1CV = Op1C->getValue();
-
+
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
@@ -535,14 +534,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
- if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
-
+
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoXform) return ICI;
@@ -556,7 +555,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
Res = ConstantExpr::getZExt(Res, CI.getType());
return ReplaceInstUsesWith(CI, Res);
}
-
+
uint32_t ShiftAmt = KnownZeroMask.logBase2();
Value *In = ICI->getOperand(0);
if (ShiftAmt) {
@@ -565,12 +564,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
In->getName()+".lobit");
}
-
+
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder->CreateXor(In, One);
}
-
+
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
@@ -643,19 +642,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If the input is a truncate from the destination type, we can trivially
// eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
-
+
unsigned Opc = I->getOpcode(), Tmp;
switch (Opc) {
case Instruction::ZExt: // zext(zext(x)) -> zext(x).
@@ -675,7 +674,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
return true;
-
+
// If the operation is an AND/OR/XOR and the bits to clear are zero in the
// other side, BitsToClear is ok.
if (Tmp == 0 &&
@@ -688,10 +687,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
APInt::getHighBitsSet(VSize, BitsToClear)))
return true;
}
-
+
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
-
+
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -713,7 +712,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
Tmp != BitsToClear)
return false;
return true;
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -740,44 +739,44 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
return Result;
- // See if we can simplify any instructions used by the input whose sole
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
-
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
-
+
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to avoid zero extend: " << CI);
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
-
+
uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// If the high bits are already filled with zeros, just replace this
// cast with the result.
if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
DestBitSize-SrcBitsKept)))
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit an AND to clear the high bits.
Constant *C = ConstantInt::get(Res->getType(),
APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
@@ -789,7 +788,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// 'and' which will be much cheaper than the pair of casts.
if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
// TODO: Subsume this into EvaluateInDifferentType.
-
+
// Get the sizes of the types involved. We know that the intermediate type
// will be smaller than A or C, but don't know the relation between A and C.
Value *A = CSrc->getOperand(0);
@@ -806,7 +805,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
return new ZExtInst(And, CI.getType());
}
-
+
if (SrcSize == DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
@@ -815,7 +814,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (SrcSize > DstSize) {
Value *Trunc = Builder->CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
- return BinaryOperator::CreateAnd(Trunc,
+ return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
AndValue));
}
@@ -873,7 +872,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *New = Builder->CreateZExt(X, CI.getType());
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
-
+
return 0;
}
@@ -986,14 +985,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// If this is a constant, it can be trivially promoted.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If this is a truncate from the dest type, we can trivially eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
@@ -1012,14 +1011,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// These operators can all arbitrarily be extended if their inputs can.
return CanEvaluateSExtd(I->getOperand(0), Ty) &&
CanEvaluateSExtd(I->getOperand(1), Ty);
-
+
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
-
+
case Instruction::Select:
return CanEvaluateSExtd(I->getOperand(1), Ty) &&
CanEvaluateSExtd(I->getOperand(2), Ty);
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -1033,7 +1032,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
@@ -1042,15 +1041,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
@@ -1073,7 +1072,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// cast with the result.
if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
@@ -1086,7 +1085,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
@@ -1122,7 +1121,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
A = Builder->CreateShl(A, ShAmtV, CI.getName());
return BinaryOperator::CreateAShr(A, ShAmtV);
}
-
+
return 0;
}
@@ -1144,7 +1143,7 @@ static Value *LookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
return LookThroughFPExtensions(I->getOperand(0));
-
+
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
// (float)((double)X+2.0) into x+2.0f.
@@ -1163,14 +1162,14 @@ static Value *LookThroughFPExtensions(Value *V) {
return V;
// Don't try to shrink to various long double types.
}
-
+
return V;
}
Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
-
+
// If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
// smaller than the destination type, we can eliminate the truncate by doing
// the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
@@ -1187,7 +1186,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Type *SrcTy = OpI->getType();
Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
- if (LHSTrunc->getType() != SrcTy &&
+ if (LHSTrunc->getType() != SrcTy &&
RHSTrunc->getType() != SrcTy) {
unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If the source types were both smaller than the destination type of
@@ -1199,10 +1198,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
}
}
- break;
+ break;
}
}
-
+
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
@@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
Callee->getAttributes(),
Builder->getFloatTy(),
Builder->getFloatTy(),
@@ -1225,15 +1224,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
-
-
+
+
// Remove the old Call. With -fmath-errno, it won't get marked readnone.
ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
EraseInstFromFunction(*Call);
return ret;
}
}
-
+
return 0;
}
@@ -1251,7 +1250,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
@@ -1265,19 +1264,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
if (OpI == 0)
return commonCastTransforms(FI);
-
+
// fptosi(sitofp(X)) --> X
// fptosi(uitofp(X)) --> X
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() <=
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-
+
return commonCastTransforms(FI);
}
@@ -1298,17 +1297,17 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getType()));
return new IntToPtrInst(P, CI.getType());
}
if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateZExt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getType()));
return new IntToPtrInst(P, CI.getType());
}
}
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
@@ -1318,19 +1317,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
-
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!
if (GEP->hasAllZeroIndices()) {
// Changing the cast operand is usually not a good idea but it is safe
- // here because the pointer operand is being replaced with another
+ // here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
Worklist.Add(GEP);
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
@@ -1345,7 +1344,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Type *GEPIdxTy =
cast<PointerType>(OrigBase->getType())->getElementType();
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+ Type *IntPtrTy = TD->getIntPtrType(OrigBase->getType());
+ if (FindElementAtOffset(GEPIdxTy, Offset, IntPtrTy, NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
@@ -1353,15 +1353,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
-
+
if (isa<BitCastInst>(CI))
return new BitCastInst(NGEP, CI.getType());
assert(isa<PtrToIntInst>(CI));
return new PtrToIntInst(NGEP, CI.getType());
- }
+ }
}
}
-
+
return commonCastTransforms(CI);
}
@@ -1373,16 +1373,16 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
if (TD) {
if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getContext(), AS));
return new TruncInst(P, CI.getType());
}
if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getContext(), AS));
return new ZExtInst(P, CI.getType());
}
}
-
+
return commonPointerCastTransforms(CI);
}
@@ -1397,33 +1397,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
VectorType *SrcTy = cast<VectorType>(InVal->getType());
-
+
if (SrcTy->getElementType() != DestTy->getElementType()) {
// The input types don't need to be identical, but for now they must be the
// same size. There is no specific reason we couldn't handle things like
// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
- // there yet.
+ // there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
return 0;
-
+
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
}
-
+
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
SmallVector<uint32_t, 16> ShuffleMask;
Value *V2;
-
+
if (SrcTy->getNumElements() > DestTy->getNumElements()) {
// If we're shrinking the number of elements, just shuffle in the low
// elements from the input and use undef as the second shuffle input.
V2 = UndefValue::get(SrcTy);
for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
ShuffleMask.push_back(i);
-
+
} else {
// If we're increasing the number of elements, shuffle in all of the
// elements from InVal and fill the rest of the result elements with zeros
@@ -1437,7 +1437,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
ShuffleMask.push_back(SrcElts);
}
-
+
return new ShuffleVectorInst(InVal, V2,
ConstantDataVector::get(V2->getContext(),
ShuffleMask));
@@ -1464,7 +1464,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *VecEltTy) {
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
-
+
// If we got down to a value of the right type, we win, try inserting into the
// right element.
if (V->getType() == VecEltTy) {
@@ -1472,15 +1472,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (Constant *C = dyn_cast<Constant>(V))
if (C->isNullValue())
return true;
-
+
// Fail if multiple elements are inserted into this slot.
if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
return false;
-
+
Elements[ElementIndex] = V;
return true;
}
-
+
if (Constant *C = dyn_cast<Constant>(V)) {
// Figure out the # elements this provides, and bitcast it or slice it up
// as required.
@@ -1491,7 +1491,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
ElementIndex, Elements, VecEltTy);
-
+
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
if (!isa<IntegerType>(C->getType()))
@@ -1499,7 +1499,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
C->getType()->getPrimitiveSizeInBits()));
unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
-
+
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
i*ElementSize));
@@ -1509,23 +1509,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
}
return true;
}
-
+
if (!V->hasOneUse()) return false;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::Or:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
Elements, VecEltTy) &&
@@ -1537,11 +1537,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (CI == 0) return false;
if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
+
return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
Elements, VecEltTy);
}
-
+
}
}
@@ -1576,11 +1576,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
if (Elements[i] == 0) continue; // Unset element.
-
+
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
}
-
+
return Result;
}
@@ -1608,11 +1608,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
}
}
-
+
// bitcast(trunc(lshr(bitcast(somevector), cst))
ConstantInt *ShAmt = 0;
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
@@ -1629,7 +1629,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1653,12 +1653,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
PointerType *SrcPTy = cast<PointerType>(SrcTy);
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
-
+
// If the address spaces don't match, don't eliminate the bitcast, which is
// required for changing types.
if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
return 0;
-
+
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
@@ -1666,14 +1666,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
return V;
-
+
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
Constant *ZeroUInt =
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
- while (SrcElTy != DstElTy &&
+ while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
@@ -1686,7 +1686,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
-
+
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
@@ -1699,7 +1699,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
-
+
if (isa<IntegerType>(SrcTy)) {
// If this is a cast from an integer to vector, check to see if the input
// is a trunc or zext of a bitcast from vector. If so, we can replace all
@@ -1712,7 +1712,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
cast<VectorType>(DestTy), *this))
return I;
}
-
+
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
@@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
- Value *Elem =
+ Value *Elem =
Builder->CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
return CastInst::Create(Instruction::BitCast, Elem, DestTy);
@@ -1733,7 +1733,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
- if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
@@ -1742,9 +1742,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
// us to eliminate at least one cast.
- if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
Tmp->getOperand(0)->getType() == DestTy) ||
- ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
Tmp->getOperand(0)->getType() == DestTy)) {
Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
@@ -1754,7 +1754,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
}
-
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e3e5ddae80..055c3b1514 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -371,7 +371,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds() &&
Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext(), AS));
// If the comparison is only true for one or two elements, emit direct
// comparisons.
@@ -539,7 +539,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS);
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -561,7 +561,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS);
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -1554,8 +1554,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits(
- cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
+ TD->getTypeSizeInBits(DestTy) ==
cast<IntegerType>(DestTy)->getBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
@@ -2251,7 +2250,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(LHSI->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4ab5b6e4a0..633ad93ad9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -173,7 +173,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -185,7 +185,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -328,7 +328,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -339,7 +339,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -376,7 +376,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -397,7 +397,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -416,7 +416,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -470,18 +470,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -489,7 +489,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -503,24 +503,23 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
- cast<PointerType>(CI->getType())->getAddressSpace() ||
+ SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -534,12 +533,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -558,7 +557,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -571,7 +570,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -602,7 +601,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -625,8 +624,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -638,7 +637,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -646,12 +645,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -681,11 +680,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -694,7 +693,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -708,12 +707,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -725,7 +724,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -745,7 +744,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -767,10 +766,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -788,7 +787,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -798,7 +797,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -807,7 +806,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -817,7 +816,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// Nuke the old stores.
EraseInstFromFunction(SI);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7f8c3ae558..00b7fca681 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -738,7 +738,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// or not there is a sequence of GEP indices into the type that will land us at
/// the specified offset. If so, fill them into NewIndices and return the
/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
+Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy,
SmallVectorImpl<Value*> &NewIndices) {
if (!TD) return 0;
if (!Ty->isSized()) return 0;
@@ -746,7 +746,6 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1055,7 +1054,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrOp->getType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -1240,7 +1239,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1275,7 +1274,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1337,7 +1336,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> NewIndices;
Type *InTy =
cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+ Type *IntPtrTy = TD->getIntPtrType(BCI->getOperand(0)->getType());
+ if (FindElementAtOffset(InTy, Offset, IntPtrTy, NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
Builder->CreateGEP(BCI->getOperand(0), NewIndices);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 976b963046..dd36a00070 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -143,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
+ IntegerType *IntTy = TD->getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 2b42c75d14..74e310f7e7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -935,7 +935,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
+ TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index eb0da20abb..b6e15540e7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -746,6 +746,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return true;
}
+/// Wrap TD.getIntPtrType, but return a vector type for vector inputs.
+static Type *getIntPtrType(Type *Ty, const DataLayout &TD) {
+ Type *ITy = TD.getIntPtrType(Ty);
+ if (Ty->isVectorTy()) {
+ ITy = VectorType::get(ITy, Ty->getVectorNumElements());
+ }
+
+ return ITy;
+}
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
@@ -769,24 +778,25 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
// Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = getIntPtrType(StoredValTy, TD);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->isPointerTy())
- TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = getIntPtrType(StoredValTy, TD);
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
@@ -798,8 +808,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = getIntPtrType(StoredValTy, TD);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -824,7 +834,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return StoredVal;
// If the result is a pointer, inttoptr.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
@@ -1019,8 +1029,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal,
+ getIntPtrType(SrcVal->getType(), TD));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
@@ -1301,7 +1312,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (V->getType()->isPointerTy()) {
+ if (V->getType()->getScalarType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
@@ -1498,7 +1509,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumGVNLoad;
@@ -1730,7 +1741,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumPRELoad;
@@ -1857,7 +1868,7 @@ bool GVN::processLoad(LoadInst *L) {
// Replace the load!
L->replaceAllUsesWith(AvailVal);
- if (AvailVal->getType()->isPointerTy())
+ if (AvailVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1914,7 +1925,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
L->replaceAllUsesWith(StoredVal);
- if (StoredVal->getType()->isPointerTy())
+ if (StoredVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1943,7 +1954,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
patchAndReplaceAllUsesWith(AvailableVal, L);
- if (DepLI->getType()->isPointerTy())
+ if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -2184,7 +2195,7 @@ bool GVN::processInstruction(Instruction *I) {
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
- if (MD && V->getType()->isPointerTy())
+ if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
++NumGVNSimpl;
@@ -2284,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) {
// Remove it!
patchAndReplaceAllUsesWith(repl, I);
- if (MD && repl->getType()->isPointerTy())
+ if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
return true;
@@ -2532,7 +2543,7 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->isPointerTy()) {
+ if (Phi->getType()->getScalarType()->isPointerTy()) {
// Because we have added a PHI-use of the pointer value, it has now
// "escaped" from alias analysis' perspective. We need to inform
// AA of this.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 82eb746467..8a2f093629 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1430,7 +1430,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
/// holds the RHS of the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
- SCEVExpander &Rewriter, ScalarEvolution *SE) {
+ SCEVExpander &Rewriter, ScalarEvolution *SE,
+ Type *IntPtrTy) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
const SCEV *IVInit = AR->getStart();
@@ -1456,7 +1457,8 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
assert(SE->getSizeOfExpr(
- cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+ cast<PointerType>(GEPBase->getType())->getElementType(),
+ IntPtrTy)->isOne()
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
@@ -1555,7 +1557,9 @@ LinearFunctionTestReplace(Loop *L,
CmpIndVar = IndVar;
}
- Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ Type *IntPtrTy = TD ? TD->getIntPtrType(IndVar->getType()) :
+ IntegerType::getInt64Ty(IndVar->getContext());
+ Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE, IntPtrTy);
assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
&& "genLoopLimit missed a cast");
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a44e798f12..e4b40f3d3a 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -486,7 +486,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ assert(DestPtr->getType()->isPointerTy()
+ && "Must be a pointer type.");
+ unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
@@ -505,7 +507,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = TD->getIntPtrType(DestPtr->getType());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -611,7 +613,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ Type *IntPtr = TD->getIntPtrType(SI->getType());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 71c62257e7..af3a880cb9 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -582,7 +582,8 @@ private:
P.Partitions.push_back(New);
}
- bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
+ bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+ bool IsVolatile) {
uint64_t Size = TD.getTypeStoreSize(Ty);
// If this memory access can be shown to *statically* extend outside the
@@ -603,7 +604,14 @@ private:
return true;
}
- insertUse(I, Offset, Size);
+ // We allow splitting of loads and stores where the type is an integer type
+ // and which cover the entire alloca. Such integer loads and stores
+ // often require decomposition into fine grained loads and stores.
+ bool IsSplittable = false;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+
+ insertUse(I, Offset, Size, IsSplittable);
return true;
}
@@ -624,7 +632,7 @@ private:
bool visitLoadInst(LoadInst &LI) {
assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
"All simple FCA loads should have been pre-split");
- return handleLoadOrStore(LI.getType(), LI, Offset);
+ return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
}
bool visitStoreInst(StoreInst &SI) {
@@ -634,7 +642,7 @@ private:
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- return handleLoadOrStore(ValOp->getType(), SI, Offset);
+ return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
}
@@ -1173,6 +1181,21 @@ Type *AllocaPartitioning::getCommonType(iterator I) const {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
UserTy = SI->getValueOperand()->getType();
+ } else {
+ return 0; // Bail if we have weird uses.
+ }
+
+ if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ // If the type is larger than the partition, skip it. We only encounter
+ // this for split integer operations where we want to use the type of the
+ // entity causing the split.
+ if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+ continue;
+
+ // If we have found an integer type use covering the alloca, use that
+ // regardless of the other types, as integers are often used for a "bucket
+ // of bits" type.
+ return ITy;
}
if (Ty && Ty != UserTy)
@@ -2138,6 +2161,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
return false;
+ // We need to ensure that an integer type with the appropriate bitwidth can
+ // be converted to the alloca type, whatever that is. We don't want to force
+ // the alloca itself to have an integer type if there is a more suitable one.
+ Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+ if (!canConvertValue(TD, AllocaTy, IntTy) ||
+ !canConvertValue(TD, IntTy, AllocaTy))
+ return false;
+
uint64_t Size = TD.getTypeStoreSize(AllocaTy);
// Check the uses to ensure the uses are (likely) promoteable integer uses.
@@ -2364,8 +2395,9 @@ private:
Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
- unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
- APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
+ assert(PointerTy->isPointerTy() &&
+ "Type must be pointer type!");
+ APInt Offset(TD.getTypeSizeInBits(PointerTy), BeginOffset - NewAllocaBeginOffset);
return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
}
@@ -2460,6 +2492,50 @@ private:
if (VecTy)
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(LI.getType())) {
+ assert(!LI.isVolatile());
+ assert(LI.getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(LI.getType()) &&
+ "Non-byte-multiple bit width");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide loads can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
+ bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+ canConvertValue(TD, NewAllocaTy, NarrowTy);
+ Value *V;
+ // Move the insertion point just past the load so that we can refer to it.
+ IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ if (IsConvertable)
+ V = convertValue(TD, IRB,
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load")),
+ NarrowTy);
+ else
+ V = IRB.CreateAlignedLoad(
+ getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+ getPartitionTypeAlign(NarrowTy), getName(".load"));
+ // Create a placeholder value with the same type as LI to use as the
+ // basis for the new value. This allows us to replace the uses of LI with
+ // the computed value, and then replace the placeholder with LI, leaving
+ // LI only used for this computation.
+ Value *Placeholder
+ = IRB.CreateLoad(UndefValue::get(LI.getType()->getPointerTo()));
+ V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+ getName(".insert"));
+ LI.replaceAllUsesWith(V);
+ Placeholder->replaceAllUsesWith(&LI);
+ cast<Instruction>(Placeholder)->eraseFromParent();
+ if (Pass.DeadSplitInsts.insert(&LI))
+ Pass.DeadInsts.push_back(&LI);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return IsConvertable;
+ }
+
if (IntTy && LI.getType()->isIntegerTy())
return rewriteIntegerLoad(IRB, LI);
@@ -2539,6 +2615,39 @@ private:
if (VecTy)
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
Type *ValueTy = SI.getValueOperand()->getType();
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(ValueTy)) {
+ assert(!SI.isVolatile());
+ assert(ValueTy->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(ValueTy->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(ValueTy) &&
+ "Non-byte-multiple bit width");
+ assert(ValueTy->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide stores can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
+ BeginOffset, getName(".extract"));
+ StoreInst *NewSI;
+ bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+ canConvertValue(TD, NarrowTy, NewAllocaTy);
+ if (IsConvertable)
+ NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
+ &NewAI, NewAI.getAlignment());
+ else
+ NewSI = IRB.CreateAlignedStore(
+ V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+ getPartitionTypeAlign(NarrowTy));
+ (void)NewSI;
+ if (Pass.DeadSplitInsts.insert(&SI))
+ Pass.DeadInsts.push_back(&SI);
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return IsConvertable;
+ }
+
if (IntTy && ValueTy->isIntegerTy())
return rewriteIntegerStore(IRB, SI);
@@ -2687,9 +2796,8 @@ private:
= P.getMemTransferOffsets(II);
assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
- unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
// Compute the relative offset within the transfer.
- unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
+ unsigned IntPtrWidth = TD.getTypeSizeInBits(OldPtr->getType());
APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
: MTO.SourceBegin));
@@ -3173,6 +3281,9 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
uint64_t Offset, uint64_t Size) {
if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
return stripAggregateTypeWrapping(TD, Ty);
+ if (Offset > TD.getTypeAllocSize(Ty) ||
+ (TD.getTypeAllocSize(Ty) - Offset) < Size)
+ return 0;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
@@ -3464,6 +3575,8 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
// Zero out the operand and see if it becomes trivially dead.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index a46d09c320..a5446294e3 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index c82a00fc2c..f3448bcd87 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -165,9 +165,10 @@ struct StpCpyOpt: public LibCallOptimization {
uint64_t Len = GetStringLength(Src);
if (Len == 0) return 0;
- Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len);
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(PT),
Len - 1));
// We have enough information to now generate the memcpy call to do the
@@ -220,9 +221,10 @@ struct StrNCpyOpt : public LibCallOptimization {
// Let strncpy handle the zero padding
if (Len > SrcLen+1) return 0;
+ Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
return Dst;
}
@@ -508,10 +510,11 @@ struct MemCpyOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -530,10 +533,11 @@ struct MemMoveOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -552,10 +556,11 @@ struct MemSetOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -980,8 +985,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ Type *AT = CI->getArgOperand(0)->getType();
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ ConstantInt::get(TD->getIntPtrType(AT), // Copy the
FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1108,8 +1114,9 @@ struct FPutsOpt : public LibCallOptimization {
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
// Known to have no uses (see above).
+ Type *PT = FT->getParamType(0);
return EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ ConstantInt::get(TD->getIntPtrType(PT), Len-1),
CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1134,8 +1141,9 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
+ Type *AT = CI->getArgOperand(1)->getType();
Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(AT),
FormatStr.size()),
CI->getArgOperand(0), B, TD, TLI);
return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index fa2faa2dad..bd28f10654 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -46,9 +46,8 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
B.getInt8PtrTy(),
NULL);
CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
@@ -73,11 +72,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
NULL);
CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
@@ -126,12 +124,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Ptr1->getType()),
+ NULL);
CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
CastToCStr(Ptr2, B), Len, "strncmp");
@@ -201,14 +199,14 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
AttrListPtr::get(AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Dst->getType()),
+ TD->getIntPtrType(Src->getType()),
+ NULL);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
@@ -230,12 +228,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt32Ty(),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
NULL);
CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
@@ -260,12 +257,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Ptr1->getType()),
+ NULL);
CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
Len, "memcmp");
@@ -425,24 +422,24 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
+ Type *PtrTy = Ptr->getType();
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
+ TD->getIntPtrType(PtrTy),
File->getType(), NULL);
else
- F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(PtrTy),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
+ TD->getIntPtrType(PtrTy),
File->getType(), NULL);
CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+ ConstantInt::get(TD->getIntPtrType(PtrTy), 1), File);
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
@@ -464,12 +461,13 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
IRBuilder<> B(CI);
if (Name == "__memcpy_chk") {
+ Type *PT = FT->getParamType(0);
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -488,11 +486,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__memmove_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -506,11 +505,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__memset_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -525,11 +525,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
@@ -551,11 +552,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 9729687a83..c09d982d65 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -806,8 +806,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
const DataLayout *TD) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
- unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
+ unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, KnownZero, KnownOne, TD);
unsigned TrailZ = KnownZero.countTrailingOnes();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a008da67e9..870e2b2ade 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -392,7 +392,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = TD->getIntPtrType(V->getType());
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -532,9 +532,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ if (TD && CV) {
+ PtrToIntInst *PTII = NULL;
+ if ((PTII = dyn_cast<PtrToIntInst>(CV)) &&
+ CV->getType() == TD->getIntPtrType(CV->getContext(),
+ PTII->getPointerAddressSpace()))
CV = PTII->getOperand(0);
+ }
return CV;
}
@@ -981,7 +985,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -2709,7 +2713,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b15acdff63..162b29e829 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -102,14 +102,13 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -125,14 +124,13 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -148,14 +146,13 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -180,7 +177,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -205,8 +202,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
Value *Ret =
EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, TD, TLI);
+ ConstantInt::get(TD->getIntPtrType(Dst->getType()),
+ Len), CI->getArgOperand(2), B, TD, TLI);
return Ret;
}
return 0;
@@ -225,7 +222,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -287,7 +284,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ ConstantInt::get(TD->getIntPtrType(Src->getType()),
+ Len + 1), 1);
return Dst;
}
};
@@ -359,8 +357,9 @@ struct StrChrOpt : public LibCallOptimization {
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
+ Type *PT = FT->getParamType(0);
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ ConstantInt::get(TD->getIntPtrType(PT), Len),
B, TD, TLI);
}
@@ -454,8 +453,9 @@ struct StrCmpOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
+ Type *PT = FT->getParamType(0);
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(PT),
std::min(Len1, Len2)), B, TD, TLI);
}
@@ -537,7 +537,7 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(TD->getIntPtrType(Dst->getType()), Len), 1);
return Dst;
}
};
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f944d9b4fc..423c7a4911 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -18,10 +18,13 @@
//
// This pass has three parts:
// 1. The main loop pass that drives the different parts.
-// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// 2. LoopVectorizationLegality - A unit that checks for the legality
// of the vectorization.
-// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// 3. SingleBlockLoopVectorizer - A unit that performs the actual
// widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+// of vectorization. It decides on the optimal vector width, which
+// can be one, if vectorization is not profitable.
//===----------------------------------------------------------------------===//
//
// The reduction-variable vectorization is based on the paper:
@@ -51,13 +54,14 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -67,13 +71,14 @@
using namespace llvm;
static cl::opt<unsigned>
-DefaultVectorizationFactor("default-loop-vectorize-width",
- cl::init(4), cl::Hidden,
- cl::desc("Set the default loop vectorization width"));
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+ cl::desc("Set the default vectorization width. Zero is autoselect."));
+
namespace {
-// Forward declaration.
+// Forward declarations.
class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
@@ -203,7 +208,10 @@ public:
enum ReductionKind {
NoReduction = -1, /// Not a reduction.
IntegerAdd = 0, /// Sum of numbers.
- IntegerMult = 1 /// Product of numbers.
+ IntegerMult = 1, /// Product of numbers.
+ IntegerOr = 2, /// Bitwise or logical OR of numbers.
+ IntegerAnd = 3, /// Bitwise or logical AND of numbers.
+ IntegerXor = 4 /// Bitwise or logical XOR of numbers.
};
/// This POD struct holds information about reduction variables.
@@ -229,11 +237,10 @@ public:
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
- /// Returns the maximum vectorization factor that we *can* use to vectorize
- /// this loop. This does not mean that it is profitable to vectorize this
- /// loop, only that it is legal to do so. This may be a large number. We
- /// can vectorize to any SIMD width below this number.
- unsigned getLoopMaxVF();
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ bool canVectorize();
/// Returns the Induction variable.
PHINode *getInduction() {return Induction;}
@@ -259,10 +266,6 @@ private:
/// Returns true if BB is vectorizable
bool canVectorizeMemory(BasicBlock &BB);
- // Check if a pointer value is known to be disjoint.
- // Example: Alloca, Global, NoAlias.
- bool isIdentifiedSafeObject(Value* Val);
-
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
@@ -290,6 +293,48 @@ private:
SmallPtrSet<Value*, 4> AllowedExit;
};
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+ /// C'tor.
+ LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
+ LoopVectorizationLegality *Leg,
+ const VectorTargetTransformInfo *Vtti):
+ TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+
+ /// Returns the most profitable vectorization factor for the loop that is
+ /// smaller or equal to the VF argument. This method checks every power
+ /// of two up to VF.
+ unsigned findBestVectorizationFactor(unsigned VF = 4);
+
+private:
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width.
+ unsigned expectedCost(unsigned VF);
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+
+ /// Vectorization legality.
+ LoopVectorizationLegality *Legal;
+ /// Vector target information.
+ const VectorTargetTransformInfo *VTTI;
+};
+
struct LoopVectorize : public LoopPass {
static char ID; // Pass identification, replacement for typeid
@@ -300,6 +345,7 @@ struct LoopVectorize : public LoopPass {
ScalarEvolution *SE;
DataLayout *DL;
LoopInfo *LI;
+ TargetTransformInfo *TTI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -309,25 +355,42 @@ struct LoopVectorize : public LoopPass {
SE = &getAnalysis<ScalarEvolution>();
DL = getAnalysisIfAvailable<DataLayout>();
LI = &getAnalysis<LoopInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL);
- unsigned MaxVF = LVL.getLoopMaxVF();
-
- // Check that we can vectorize this loop using the chosen vectorization
- // width.
- if (MaxVF < DefaultVectorizationFactor) {
- DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+ // Select the preffered vectorization factor.
+ unsigned VF = 1;
+ if (VectorizationFactor == 0) {
+ const VectorTargetTransformInfo *VTTI = 0;
+ if (TTI)
+ VTTI = TTI->getVectorTargetTransformInfo();
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+ VF = CM.findBestVectorizationFactor();
+
+ if (VF == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
+ }
+
+ } else {
+ // Use the user command flag.
+ VF = VectorizationFactor;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
- SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+ SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -660,6 +723,13 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal
void
SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ //===------------------------------------------------===//
+ //
+ // Notice: any optimization or new instruction that go
+ // into the code below should be also be implemented in
+ // the cost-model.
+ //
+ //===------------------------------------------------===//
typedef SmallVector<PHINode*, 4> PhiVector;
BasicBlock &BB = *OrigLoop->getHeader();
Constant *Zero = ConstantInt::get(
@@ -914,14 +984,28 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Extract the first scalar.
Value *Scalar0 =
Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
- // Extract and sum the remaining vector elements.
+ // Extract and reduce the remaining vector elements.
for (unsigned i=1; i < VF; ++i) {
Value *Scalar1 =
Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
- if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) {
- Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
- } else {
- Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ switch (RdxDesc.Kind) {
+ case LoopVectorizationLegality::IntegerAdd:
+ Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerMult:
+ Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerOr:
+ Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerAnd:
+ Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerXor:
+ Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+ break;
+ default:
+ llvm_unreachable("Unknown reduction operation");
}
}
@@ -961,18 +1045,18 @@ void SingleBlockLoopVectorizer::cleanup() {
SE->forgetLoop(OrigLoop);
}
-unsigned LoopVectorizationLegality::getLoopMaxVF() {
+bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getLoopPreheader()) {
assert(false && "No preheader!!");
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
- return 1;
+ return false;
}
// We can only vectorize single basic block loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1) {
DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
- return 1;
+ return false;
}
// We need to have a loop header.
@@ -982,22 +1066,22 @@ unsigned LoopVectorizationLegality::getLoopMaxVF() {
// Go over each instruction and look at memory deps.
if (!canVectorizeBlock(*BB)) {
DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
- return 1;
+ return false;
}
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
- return 1;
+ return false;
}
DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
- // which may limit our maximum vectorization factor, so just return the
- // maximum SIMD size.
- return DefaultVectorizationFactor;
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return true;
}
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
@@ -1032,7 +1116,19 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
continue;
}
if (AddReductionVar(Phi, IntegerMult)) {
- DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n");
+ DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerOr)) {
+ DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerAnd)) {
+ DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerXor)) {
+ DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
@@ -1178,7 +1274,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
GetUnderlyingObjects(*I, TempObjects, DL);
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
it != e; ++it) {
- if (!isIdentifiedSafeObject(*it)) {
+ if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
return false;
}
@@ -1196,7 +1292,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
GetUnderlyingObjects(*I, TempObjects, DL);
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
it != e; ++it) {
- if (!isIdentifiedSafeObject(*it)) {
+ if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
return false;
}
@@ -1213,19 +1309,6 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
return true;
}
-/// Checks if the value is a Global variable or if it is an Arguments
-/// marked with the NoAlias attribute.
-bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) {
- assert(Val && "Invalid value");
- if (isa<GlobalValue>(Val))
- return true;
- if (isa<AllocaInst>(Val))
- return true;
- if (Argument *A = dyn_cast<Argument>(Val))
- return A->hasNoAliasAttr();
- return false;
-}
-
bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
ReductionKind Kind) {
if (Phi->getNumIncomingValues() != 2)
@@ -1319,6 +1402,12 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
case Instruction::UDiv:
case Instruction::SDiv:
return Kind == IntegerMult;
+ case Instruction::And:
+ return Kind == IntegerAnd;
+ case Instruction::Or:
+ return Kind == IntegerOr;
+ case Instruction::Xor:
+ return Kind == IntegerXor;
}
}
@@ -1340,6 +1429,193 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return true;
}
+unsigned
+LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+ if (!VTTI) {
+ DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
+ return 1;
+ }
+
+ float Cost = expectedCost(1);
+ unsigned Width = 1;
+ DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ for (unsigned i=2; i <= VF; i*=2) {
+ // Notice that the vector loop needs to be executed less times, so
+ // we need to divide the cost of the vector loops by the width of
+ // the vector elements.
+ float VectorCost = expectedCost(i) / (float)i;
+ DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ (int)VectorCost << ".\n");
+ if (VectorCost < Cost) {
+ Cost = VectorCost;
+ Width = i;
+ }
+ }
+
+ DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ return Width;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+ // We can only estimate the cost of single basic block loops.
+ assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+
+ BasicBlock *BB = TheLoop->getHeader();
+ unsigned Cost = 0;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Instruction *Inst = it;
+ unsigned C = getInstructionCost(Inst, VF);
+ Cost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
+ " For instruction: "<< *Inst << "\n");
+ }
+
+ return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+ assert(VTTI && "Invalid vector target transformation info");
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return 0;
+ case Instruction::Br: {
+ return VTTI->getInstrCost(I->getOpcode());
+ }
+ case Instruction::PHI:
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Type *VTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+ const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ Type *CondTy = SI->getCondition()->getType();
+ if (ScalarCond)
+ CondTy = VectorType::get(CondTy, VF);
+
+ return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+
+ // Scalarized stores.
+ if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+ unsigned Cost = 0;
+ if (VF != 1) {
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ VTy);
+ // The cost of extracting from the value vector and pointer vector.
+ Cost += VF * (ExtCost * 2);
+ }
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ VTy->getScalarType(),
+ SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide stores.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+
+ // Scalarized loads.
+ if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+ unsigned Cost = 0;
+ if (VF != 1) {
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
+
+ // The cost of inserting the loaded value into the result vector, and
+ // extracting from a vector of pointers.
+ Cost += VF * (InCost + ExCost);
+ }
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide loads.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ Type *DstTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
+ }
+ default: {
+ // We are scalarizing the instruction. Return the cost of the scalar
+ // instruction, plus the cost of insert and extract into vector
+ // elements, times the vector width.
+ unsigned Cost = 0;
+ Type *Ty = I->getType();
+
+ if (!Ty->isVoidTy()) {
+ Type *VTy = VectorType::get(Ty, VF);
+ unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
+ Cost += VF * (InsCost + ExtCost);
+ }
+
+ /// We don't have any information on the scalar instruction, but maybe
+ /// the target has.
+ /// TODO: This may be a target-specific intrinsic.
+ /// Need to add API for that.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
+
+ return Cost;
+ }
+ }// end of switch.
+}
+
+
} // namespace
char LoopVectorize::ID = 0;