aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2012-12-20 22:31:43 +0000
committerChandler Carruth <chandlerc@gmail.com>2012-12-20 22:31:43 +0000
commit41d2daa9344a4c4e8bb88dba51cd087c0648b695 (patch)
tree7e22b30849a5a15a044a3423d0e8df884992ceea /lib/Transforms
parent571f3e0cc9e767d1e3dcfd3e7e0b3d11ec4f7884 (diff)
parent6c583141bf6b7a6b5f8125c1037ecbc089813288 (diff)
Updating branches/google/testing to 170392
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@170779 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/Inliner.cpp6
-rw-r--r--lib/Transforms/IPO/Internalize.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp102
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp34
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp54
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp217
-rw-r--r--lib/Transforms/Scalar/SROA.cpp373
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp35
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp145
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.h12
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp2
14 files changed, 650 insertions, 355 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index bd8fa66d52..b636414250 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -214,11 +214,13 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
OptSizeThreshold < thres)
thres = OptSizeThreshold;
- // Listen to the inlinehint attribute when it would increase the threshold.
+ // Listen to the inlinehint attribute when it would increase the threshold
+ // and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
Callee->getFnAttributes().hasAttribute(Attributes::InlineHint);
- if (InlineHint && HintThreshold > thres)
+ if (InlineHint && HintThreshold > thres
+ && !Caller->getFnAttributes().hasAttribute(Attributes::MinSize))
thres = HintThreshold;
return thres;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index b2cd3a765a..bd94f0a252 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -48,7 +48,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
- explicit InternalizePass(const std::vector <const char *>& exportList);
+ explicit InternalizePass(ArrayRef<const char *> exportList);
void LoadFile(const char *Filename);
virtual bool runOnModule(Module &M);
@@ -72,10 +72,10 @@ InternalizePass::InternalizePass()
ExternalNames.insert(APIList.begin(), APIList.end());
}
-InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
: ModulePass(ID){
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- for(std::vector<const char *>::const_iterator itr = exportList.begin();
+ for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
}
@@ -173,6 +173,6 @@ ModulePass *llvm::createInternalizePass() {
return new InternalizePass();
}
-ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
return new InternalizePass(el);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d8257e64d8..47223c3b35 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -37,10 +37,10 @@ static Constant *SubOne(ConstantInt *C) {
static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
if (!V->hasOneUse() || !V->getType()->isIntegerTy())
return 0;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
-
+
if (I->getOpcode() == Instruction::Mul)
if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
return I->getOperand(0);
@@ -64,22 +64,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
// There are different heuristics we can use for this. Here are some simple
// ones.
-
- // Add has the property that adding any two 2's complement numbers can only
+
+ // Add has the property that adding any two 2's complement numbers can only
// have one carry bit which can change a sign. As such, if LHS and RHS each
// have at least two sign bits, we know that the addition of the two values
// will sign extend fine.
if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
return true;
-
-
+
+
// If one of the operands only has one non-zero bit, and if the other operand
// has a known-zero bit in a more significant place than it (not including the
// sign bit) the ripple may go up to and fill the zero, but won't change the
// sign. For example, (X & ~4) + 1.
-
+
// TODO: Implement.
-
+
return false;
}
@@ -100,7 +100,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
const APInt &Val = CI->getValue();
if (Val.isSignBit())
return BinaryOperator::CreateXor(LHS, RHS);
-
+
// See if SimplifyDemandedBits can simplify this. This handles stuff like
// (X & 254)+1 -> (X&254)|1
if (SimplifyDemandedInstructionBits(I))
@@ -110,7 +110,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
if (ZI->getSrcTy()->isIntegerTy(1))
return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
-
+
Value *XorLHS = 0; ConstantInt *XorRHS = 0;
if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
@@ -124,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
else if (XorRHS->getValue().isPowerOf2())
ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
}
-
+
if (ExtendAmt) {
APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
if (!MaskedValueIsZero(XorLHS, Mask))
ExtendAmt = 0;
}
-
+
if (ExtendAmt) {
Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -175,7 +175,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
return BinaryOperator::CreateNeg(NewAdd);
}
-
+
return BinaryOperator::CreateSub(RHS, LHSV);
}
@@ -209,7 +209,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
-
+
// No bits in common -> bitwise or.
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateOr(LHS, RHS);
@@ -251,7 +251,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// See if all bits from the first bit set in the Add RHS up are included
// in the mask. First, get the rightmost bit.
const APInt &AddRHSV = CRHS->getValue();
-
+
// Form a mask of all bits from the lowest bit added through the top.
APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
@@ -289,7 +289,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the true select value.
return SelectInst::Create(SI->getCondition(), N, A);
-
+
if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the false select value.
return SelectInst::Create(SI->getCondition(), A, N);
@@ -301,18 +301,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
// (add (sext x), cst) --> (sext (add x, cst'))
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
- Constant *CI =
+ Constant *CI =
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
// Insert the new, smaller add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
return new SExtInst(NewAdd, I.getType());
}
}
-
+
// (add (sext x), (sext y)) --> (sext (add int x, y))
if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
@@ -323,7 +323,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0))) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
return new SExtInst(NewAdd, I.getType());
}
@@ -351,18 +351,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
- // X + 0 --> X
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->isExactlyValue(ConstantFP::getNegativeZero
- (I.getType())->getValueAPF()))
- return ReplaceInstUsesWith(I, LHS);
- }
+ if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
- if (isa<PHINode>(LHS))
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
- }
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
// -A + B --> B - A
// -A + -B --> -(A + B)
@@ -374,11 +368,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Value *V = dyn_castFNegVal(RHS))
return BinaryOperator::CreateFSub(LHS, V);
- // Check for X+0.0. Simplify it to X if we know X is not -0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
- if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
- return ReplaceInstUsesWith(I, LHS);
-
// Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
@@ -388,7 +377,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// requires a constant pool load, and generally allows the add to be better
// instcombined.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
- Constant *CI =
+ Constant *CI =
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
@@ -399,7 +388,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return new SIToFPInst(NewAdd, I.getType());
}
}
-
+
// (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
@@ -410,13 +399,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0))) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}
}
-
+
return Changed ? &I : 0;
}
@@ -428,7 +417,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
assert(TD && "Must have target data info for this");
-
+
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -451,7 +440,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
}
}
}
-
+
if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
// X - (gep X, ...)
if (RHSGEP->getOperand(0) == LHS) {
@@ -467,16 +456,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
}
}
}
-
+
// Avoid duplicating the arithmetic if GEP2 has non-constant indices and
// multiple users.
if (GEP1 == 0 ||
(GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
return 0;
-
+
// Emit the offset of the GEP and an intptr_t.
Value *Result = EmitGEPOffset(GEP1);
-
+
// If we had a constant expression GEP on the other side offsetting the
// pointer, subtract it from the offset we have.
if (GEP2) {
@@ -517,7 +506,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Replace (-1 - A) with (~A).
if (match(Op0, m_AllOnes()))
return BinaryOperator::CreateNot(Op1);
-
+
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
// C - ~X == X + (1+C)
Value *X = 0;
@@ -553,18 +542,18 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return &I;
}
-
+
{ Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
return BinaryOperator::CreateNeg(Y);
-
+
// (X-Y)-X == -Y
if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
return BinaryOperator::CreateNeg(Y);
}
-
+
if (Op1->hasOneUse()) {
Value *X = 0, *Y = 0, *Z = 0;
Constant *C = 0;
@@ -581,7 +570,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
return BinaryOperator::CreateAnd(Op0,
Builder->CreateNot(Y, Y->getName() + ".not"));
-
+
// 0 - (X sdiv C) -> (X sdiv -C)
if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
match(Op0, m_Zero()))
@@ -604,14 +593,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
return BinaryOperator::CreateMul(Op0, C);
}
-
+
// X - A*-B -> X + A*B
// X - -A*B -> X + A*B
Value *A, *B;
if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
-
+
// X - A*CI -> X + A*-CI
// X - CI*A -> X + A*-CI
if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
@@ -630,7 +619,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (X == dyn_castFoldableMul(Op1, C2))
return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
}
-
+
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
if (TD) {
@@ -639,20 +628,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
match(Op1, m_PtrToInt(m_Value(RHSOp))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);
-
+
// trunc(p)-trunc(q) -> trunc(p-q)
if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);
}
-
+
return 0;
}
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
+
// If this is a 'B = x-(-A)', change to B = x+A...
if (Value *V = dyn_castFNegVal(Op1))
return BinaryOperator::CreateFAdd(Op0, V);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 784742f274..ba4a57329d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -16,9 +16,11 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/DataLayout.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+using namespace PatternMatch;
STATISTIC(NumSimplified, "Number of library calls simplified");
@@ -276,25 +278,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
return 0;
}
- case Intrinsic::bswap:
+ case Intrinsic::bswap: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = 0;
+
// bswap(bswap(x)) -> x
- if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
- if (Operand->getIntrinsicID() == Intrinsic::bswap)
- return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+ if (match(IIOperand, m_BSwap(m_Value(X))))
+ return ReplaceInstUsesWith(CI, X);
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
- if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
- if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
- if (Operand->getIntrinsicID() == Intrinsic::bswap) {
- unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
- TI->getType()->getPrimitiveSizeInBits();
- Value *CV = ConstantInt::get(Operand->getType(), C);
- Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
- return new TruncInst(V, TI->getType());
- }
+ if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
+ unsigned C = X->getType()->getPrimitiveSizeInBits() -
+ IIOperand->getType()->getPrimitiveSizeInBits();
+ Value *CV = ConstantInt::get(X->getType(), C);
+ Value *V = Builder->CreateLShr(X, CV);
+ return new TruncInst(V, IIOperand->getType());
}
-
break;
+ }
+
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0
@@ -693,7 +695,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Splat->isOne()) {
if (Zext)
return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
- // else
+ // else
return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
}
}
@@ -899,7 +901,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
OldCall);
- // If OldCall dues not return void then replaceAllUsesWith undef.
+ // If OldCall does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 5cd611c420..964297a5ea 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
- isPowerOfTwo(PowerOf2, IC.getDataLayout())) {
+ isKnownToBeAPowerOfTwo(PowerOf2)) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
@@ -45,8 +45,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
- if (I->isLogicalShift() &&
- isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) {
+ if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
@@ -296,20 +295,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // Simplify mul instructions with a constant RHS.
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
- // "In IEEE floating point, x*1 is not equivalent to x for nans. However,
- // ANSI says we can drop signals, so we can do this anyway." (from GCC)
- if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
- } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) {
- // As above, vector X*splat(1.0) -> X in all defined cases.
- if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
- if (F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0);
- }
+ if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -351,6 +341,38 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // X * cond ? 1.0 : 0.0 => cond ? X : 0.0
+ if (I.hasNoNaNs() && I.hasNoSignedZeros()) {
+ Value *V0 = I.getOperand(0);
+ Value *V1 = I.getOperand(1);
+ Value *Cond, *SLHS, *SRHS;
+ bool Match = false;
+
+ if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) {
+ Match = true;
+ } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS),
+ m_Value(SRHS)))) {
+ Match = true;
+ std::swap(V0, V1);
+ }
+
+ if (Match) {
+ ConstantFP *C0 = dyn_cast<ConstantFP>(SLHS);
+ ConstantFP *C1 = dyn_cast<ConstantFP>(SRHS);
+
+ if (C0 && C1 &&
+ ((C0->isZero() && C1->isExactlyValue(1.0)) ||
+ (C1->isZero() && C0->isExactlyValue(1.0)))) {
+ Value *T;
+ if (C0->isZero())
+ T = Builder->CreateSelect(Cond, SLHS, V1);
+ else
+ T = Builder->CreateSelect(Cond, V1, SRHS);
+ return ReplaceInstUsesWith(I, T);
+ }
+ }
+ }
+
return Changed ? &I : 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 08aedb3200..13653183a7 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -858,8 +858,8 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Value *VarX = Shr->getOperand(0);
Type *Ty = VarX->getType();
- APInt BitMask1(Ty->getIntegerBitWidth(), (uint64_t)-1);
- APInt BitMask2(Ty->getIntegerBitWidth(), (uint64_t)-1);
+ APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+ APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
@@ -891,6 +891,8 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
BinaryOperator::CreateAShr(VarX, Amt);
+ if (cast<BinaryOperator>(Shr)->isExact())
+ New->setIsExact(true);
}
return InsertNewInstWith(New, *Shl);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f095cff33c..e0c610ffa4 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DataLayout.h"
+#include "llvm/DIBuilder.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Type.h"
#include <algorithm>
@@ -1158,6 +1160,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
SmallVector<Instruction*, 8> RetVec;
uint64_t TotalSize = 0;
bool HavePoisonedAllocas = false;
+ DIBuilder DIB(*F.getParent());
// Filter out Alloca instructions we want (and can) handle.
// Collect Ret instructions.
@@ -1228,6 +1231,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
AI->getType());
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
AI->replaceAllUsesWith(NewAllocaPtr);
// Analyze lifetime intrinsics only for static allocas we handle.
if (CheckLifetime)
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 947a2e3b12..59902269a0 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -76,6 +76,7 @@ static const uint64_t kShadowMask32 = 1ULL << 31;
static const uint64_t kShadowMask64 = 1ULL << 46;
static const uint64_t kOriginOffset32 = 1ULL << 30;
static const uint64_t kOriginOffset64 = 1ULL << 45;
+static const uint64_t kShadowTLSAlignment = 8;
// This is an important flag that makes the reports much more
// informative at the cost of greater slowdown. Not fully implemented
@@ -132,14 +133,14 @@ namespace {
/// MemorySanitizer: instrument the code in module to find
/// uninitialized reads.
class MemorySanitizer : public FunctionPass {
-public:
+ public:
MemorySanitizer() : FunctionPass(ID), TD(0), WarningFn(0) { }
const char *getPassName() const { return "MemorySanitizer"; }
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
static char ID; // Pass identification, replacement for typeid.
-private:
+ private:
void initializeCallbacks(Module &M);
DataLayout *TD;
@@ -241,8 +242,8 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
MsanPoisonStackFn = M.getOrInsertFunction(
"__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
MemmoveFn = M.getOrInsertFunction(
- "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IntptrTy, NULL);
+ "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
MemcpyFn = M.getOrInsertFunction(
"__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
IntptrTy, NULL);
@@ -377,7 +378,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// An unfortunate workaround for asymmetric lowering of va_arg stuff.
// See a comment in visitCallSite for more details.
- static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+ static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
static const unsigned AMD64FpEndOffset = 176;
struct ShadowOriginAndInsertPoint {
@@ -409,7 +410,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = getShadow(Val);
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
- StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+ StoreInst *NewSI =
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
// If the store is volatile, add a check.
@@ -420,7 +422,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClTrackOrigins) {
if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
- IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment());
+ IRB.CreateStore(getOrigin(Val), getOriginPtr(Addr, IRB));
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
@@ -434,10 +436,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, MS.OriginStoreWeights);
- IRBuilder<> IRBNewBlock(CheckTerm);
- IRBNewBlock.CreateAlignedStore(getOrigin(Val),
- getOriginPtr(Addr, IRBNewBlock), I.getAlignment());
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false,
+ MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ IRBNew.CreateStore(getOrigin(Val), getOriginPtr(Addr, IRBNew));
}
}
}
@@ -768,7 +770,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
}
- //------------------- Visitors.
+ // ------------------- Visitors.
/// \brief Instrument LoadInst
///
@@ -786,7 +788,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
insertCheck(I.getPointerOperand(), &I);
if (ClTrackOrigins)
- setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment()));
+ setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
}
/// \brief Instrument StoreInst
@@ -918,67 +920,133 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- /// \brief Propagate origin for an instruction.
+ /// \brief Default propagation of shadow and/or origin.
///
- /// This is a general case of origin propagation. For an Nary operation,
- /// is set to the origin of an argument that is not entirely initialized.
- /// If there is more than one such arguments, the rightmost of them is picked.
- /// It does not matter which one is picked if all arguments are initialized.
- void setOriginForNaryOp(Instruction &I) {
- if (!ClTrackOrigins) return;
- IRBuilder<> IRB(&I);
- Value *Origin = getOrigin(&I, 0);
- for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) {
- Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB);
- Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)),
- getOrigin(&I, Op), Origin);
+ /// This class implements the general case of shadow propagation, used in all
+ /// cases where we don't know and/or don't care about what the operation
+ /// actually does. It converts all input shadow values to a common type
+ /// (extending or truncating as necessary), and bitwise OR's them.
+ ///
+ /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+ /// fully initialized), and less prone to false positives.
+ ///
+ /// This class also implements the general case of origin propagation. For a
+ /// Nary operation, result origin is set to the origin of an argument that is
+ /// not entirely initialized. If there is more than one such arguments, the
+ /// rightmost of them is picked. It does not matter which one is picked if all
+ /// arguments are initialized.
+ template <bool CombineShadow>
+ class Combiner {
+ Value *Shadow;
+ Value *Origin;
+ IRBuilder<> &IRB;
+ MemorySanitizerVisitor *MSV;
+
+ public:
+ Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) :
+ Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {}
+
+ /// \brief Add a pair of shadow and origin values to the mix.
+ Combiner &Add(Value *OpShadow, Value *OpOrigin) {
+ if (CombineShadow) {
+ assert(OpShadow);
+ if (!Shadow)
+ Shadow = OpShadow;
+ else {
+ OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
+ Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
+ }
+ }
+
+ if (ClTrackOrigins) {
+ assert(OpOrigin);
+ if (!Origin) {
+ Origin = OpOrigin;
+ } else {
+ Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+ Value *Cond = IRB.CreateICmpNE(FlatShadow,
+ MSV->getCleanShadow(FlatShadow));
+ Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+ }
+ }
+ return *this;
}
- setOrigin(&I, Origin);
- }
- /// \brief Propagate shadow for a binary operation.
- ///
- /// Shadow = Shadow0 | Shadow1, all 3 must have the same type.
- /// Bitwise OR is selected as an operation that will never lose even a bit of
- /// poison.
- void handleShadowOrBinary(Instruction &I) {
+ /// \brief Add an application value to the mix.
+ Combiner &Add(Value *V) {
+ Value *OpShadow = MSV->getShadow(V);
+ Value *OpOrigin = ClTrackOrigins ? MSV->getOrigin(V) : 0;
+ return Add(OpShadow, OpOrigin);
+ }
+
+ /// \brief Set the current combined values as the given instruction's shadow
+ /// and origin.
+ void Done(Instruction *I) {
+ if (CombineShadow) {
+ assert(Shadow);
+ Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
+ MSV->setShadow(I, Shadow);
+ }
+ if (ClTrackOrigins) {
+ assert(Origin);
+ MSV->setOrigin(I, Origin);
+ }
+ }
+ };
+
+ typedef Combiner<true> ShadowAndOriginCombiner;
+ typedef Combiner<false> OriginCombiner;
+
+ /// \brief Propagate origin for arbitrary operation.
+ void setOriginForNaryOp(Instruction &I) {
+ if (!ClTrackOrigins) return;
IRBuilder<> IRB(&I);
- Value *Shadow0 = getShadow(&I, 0);
- Value *Shadow1 = getShadow(&I, 1);
- setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop"));
- setOriginForNaryOp(I);
+ OriginCombiner OC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ OC.Add(OI->get());
+ OC.Done(&I);
+ }
+
+ size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
+ return Ty->isVectorTy() ?
+ Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
+ Ty->getPrimitiveSizeInBits();
+ }
+
+ /// \brief Cast between two shadow types, extending or truncating as
+ /// necessary.
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+ Type *srcTy = V->getType();
+ if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
+ return IRB.CreateIntCast(V, dstTy, false);
+ if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
+ dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
+ return IRB.CreateIntCast(V, dstTy, false);
+ size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
+ size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
+ Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
+ Value *V2 =
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+ return IRB.CreateBitCast(V2, dstTy);
+ // TODO: handle struct types.
}
/// \brief Propagate shadow for arbitrary operation.
- ///
- /// This is a general case of shadow propagation, used in all cases where we
- /// don't know and/or care about what the operation actually does.
- /// It converts all input shadow values to a common type (extending or
- /// truncating as necessary), and bitwise OR's them.
- ///
- /// This is much cheaper than inserting checks (i.e. requiring inputs to be
- /// fully initialized), and less prone to false positives.
- // FIXME: is the casting actually correct?
- // FIXME: merge this with handleShadowOrBinary.
void handleShadowOr(Instruction &I) {
IRBuilder<> IRB(&I);
- Value *Shadow = getShadow(&I, 0);
- for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op)
- Shadow = IRB.CreateOr(
- Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false),
- "_msprop");
- Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false);
- setShadow(&I, Shadow);
- setOriginForNaryOp(I);
+ ShadowAndOriginCombiner SC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ SC.Add(OI->get());
+ SC.Done(&I);
}
- void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); }
- void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); }
+ void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
+ void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitXor(BinaryOperator &I) { handleShadowOr(I); }
+ void visitMul(BinaryOperator &I) { handleShadowOr(I); }
void handleDiv(Instruction &I) {
IRBuilder<> IRB(&I);
@@ -1155,9 +1223,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
- handleBswap(I); break;
+ handleBswap(I);
+ break;
default:
- visitInstruction(I); break;
+ visitInstruction(I);
+ break;
}
}
@@ -1226,7 +1296,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Size, Alignment);
} else {
Size = MS.TD->getTypeAllocSize(A->getType());
- Store = IRB.CreateStore(ArgShadow, ArgShadowBase);
+ Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+ kShadowTLSAlignment);
}
if (ClTrackOrigins)
IRB.CreateStore(getOrigin(A),
@@ -1248,7 +1319,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRBBefore(&I);
// Untill we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
- IRBBefore.CreateStore(getCleanShadow(&I), Base);
+ IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
Instruction *NextInsn = 0;
if (CS.isCall()) {
NextInsn = I.getNextNode();
@@ -1267,8 +1338,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"Could not find insertion point for retval shadow load");
}
IRBuilder<> IRBAfter(NextInsn);
- setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter),
- "_msret"));
+ Value *RetvalShadow =
+ IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
+ kShadowTLSAlignment, "_msret");
+ setShadow(&I, RetvalShadow);
if (ClTrackOrigins)
setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
}
@@ -1280,7 +1353,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = getShadow(RetVal);
Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
- IRB.CreateStore(Shadow, ShadowPtr);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
if (ClTrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -1407,7 +1480,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
struct VarArgAMD64Helper : public VarArgHelper {
// An unfortunate workaround for asymmetric lowering of va_arg stuff.
// See a comment in visitCallSite for more details.
- static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+ static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
static const unsigned AMD64FpEndOffset = 176;
Function &F;
@@ -1471,7 +1544,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
}
- IRB.CreateStore(MSV.getShadow(A), Base);
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
Constant *OverflowSize =
ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1c220ca0f6..a0ee849a03 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2150,7 +2150,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
- if (MI->isVolatile())
+ if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
@@ -2223,6 +2223,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
+static Value *extractVector(IRBuilder<> &IRB, Value *V,
+ unsigned BeginIndex, unsigned EndIndex,
+ const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ if (NumElements == VecTy->getNumElements())
+ return V;
+
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ Name + ".extract");
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ return V;
+ }
+
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".extract");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ return V;
+}
+
+static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+ unsigned BeginIndex, const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(Old->getType());
+ assert(VecTy && "Can only insert a vector into a vector");
+
+ VectorType *Ty = dyn_cast<VectorType>(V->getType());
+ if (!Ty) {
+ // Single element to insert.
+ V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+ Name + ".insert");
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ return V;
+ }
+
+ assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+ "Too many elements!");
+ if (Ty->getNumElements() == VecTy->getNumElements()) {
+ assert(V->getType() == VecTy && "Vector type mismatch");
+ return V;
+ }
+ unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".expand");
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+ Name + "insert");
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ return V;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -2388,29 +2466,14 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
- unsigned NumElements = EndIndex - BeginIndex;
- assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- if (NumElements == 1) {
- V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
- getName(".extract"));
- DEBUG(dbgs() << " extract: " << *V << "\n");
- } else if (NumElements < VecTy->getNumElements()) {
- SmallVector<Constant*, 8> Mask;
- Mask.reserve(NumElements);
- for (unsigned i = BeginIndex; i != EndIndex; ++i)
- Mask.push_back(IRB.getInt32(i));
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(Mask),
- getName(".extract"));
- DEBUG(dbgs() << " shuffle: " << *V << "\n");
- }
- return V;
+
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
}
Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
@@ -2457,7 +2520,7 @@ private:
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ V = rewriteVectorizedLoadInst(IRB);
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(IRB, LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
@@ -2518,44 +2581,12 @@ private:
: VectorType::get(ElementTy, NumElements);
if (V->getType() != PartitionTy)
V = convertValue(TD, IRB, V, PartitionTy);
- if (NumElements < VecTy->getNumElements()) {
- // We need to mix in the existing elements.
- LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- if (NumElements == 1) {
- V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex),
- getName(".insert"));
- DEBUG(dbgs() << " insert: " << *V << "\n");
- } else {
- // When inserting a smaller vector into the larger to store, we first
- // use a shuffle vector to widen it with undef elements, and then
- // a second shuffle vector to select between the loaded vector and the
- // incoming vector.
- SmallVector<Constant*, 8> Mask;
- Mask.reserve(VecTy->getNumElements());
- for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i - BeginIndex));
- else
- Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(Mask),
- getName(".expand"));
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
-
- Mask.clear();
- for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask),
- getName("insert"));
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
- }
- } else {
- V = convertValue(TD, IRB, V, VecTy);
- }
+
+ // Mix in the existing elements.
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2607,7 +2638,7 @@ private:
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
@@ -2639,6 +2670,51 @@ private:
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
+ /// \brief Compute an integer value from splatting an i8 across the given
+ /// number of bytes.
+ ///
+ /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+ /// call this routine.
+ /// FIXME: Heed the abvice above.
+ ///
+ /// \param V The i8 value to splat.
+ /// \param Size The number of bytes in the output (assuming i8 is one byte)
+ Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+ assert(Size > 0 && "Expected a positive number of bytes.");
+ IntegerType *VTy = cast<IntegerType>(V->getType());
+ assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+ if (Size == 1)
+ return V;
+
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ ConstantExpr::getUDiv(
+ Constant::getAllOnesValue(SplatIntTy),
+ ConstantExpr::getZExt(
+ Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
+ getName(".isplat"));
+ return V;
+ }
+
+ /// \brief Compute a vector splat for a given element value.
+ Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
+ assert(NumElements > 0 && "Cannot splat to an empty vector.");
+
+ // First insert it into a one-element vector so we can shuffle it. It is
+ // really silly that LLVM's IR requires this in order to form a splat.
+ Value *Undef = UndefValue::get(VectorType::get(V->getType(), 1));
+ V = IRB.CreateInsertElement(Undef, V, IRB.getInt32(0),
+ getName(".splatinsert"));
+
+ // Shuffle the value across the desired number of elements.
+ SmallVector<Constant*, 8> Mask(NumElements, IRB.getInt32(0));
+ V = IRB.CreateShuffleVector(V, Undef, ConstantVector::get(Mask),
+ getName(".splat"));
+ DEBUG(dbgs() << " splat: " << *V << "\n");
+ return V;
+ }
+
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
IRBuilder<> IRB(&II);
@@ -2667,7 +2743,8 @@ private:
(BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaEndOffset ||
!AllocaTy->isSingleValueType() ||
- !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+ TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
CallInst *New
@@ -2683,53 +2760,62 @@ private:
// If we can represent this as a simple value, we have to build the actual
// value to store, which requires expanding the byte present in memset to
// a sensible representation for the alloca type. This is essentially
- // splatting the byte to a sufficiently wide integer, bitcasting to the
- // desired scalar type, and splatting it across any desired vector type.
+ // splatting the byte to a sufficiently wide integer, splatting it across
+ // any desired vector width, and bitcasting to the final type.
uint64_t Size = EndOffset - BeginOffset;
- Value *V = II.getValue();
- IntegerType *VTy = cast<IntegerType>(V->getType());
- Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- if (Size*8 > VTy->getBitWidth())
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
- ConstantExpr::getUDiv(
- Constant::getAllOnesValue(SplatIntTy),
- ConstantExpr::getZExt(
- Constant::getAllOnesValue(V->getType()),
- SplatIntTy)),
- getName(".isplat"));
-
- // If this is an element-wide memset of a vectorizable alloca, insert it.
- if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- if (V->getType() != ScalarTy)
- V = convertValue(TD, IRB, V, ScalarTy);
- StoreInst *Store = IRB.CreateAlignedStore(
- IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
- NewAI.getAlignment(),
- getName(".load")),
- V, IRB.getInt32(getIndex(BeginOffset)),
- getName(".insert")),
- &NewAI, NewAI.getAlignment());
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
- }
+ Value *V = getIntegerSplat(IRB, II.getValue(), Size);
+
+ if (VecTy) {
+ // If this is a memset of a vectorized alloca, insert it.
+ assert(ElementTy == ScalarTy);
+
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ Value *Splat = getIntegerSplat(IRB, II.getValue(),
+ TD.getTypeSizeInBits(ElementTy)/8);
+ Splat = convertValue(TD, IRB, Splat, ElementTy);
+ if (NumElements > 1)
+ Splat = getVectorSplat(IRB, Splat, NumElements);
- // If this is a memset on an alloca where we can widen stores, insert the
- // set integer.
- if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- assert(!II.isVolatile());
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
- Old = convertValue(TD, IRB, Old, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
- }
+ V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+ } else if (IntTy) {
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ assert(!II.isVolatile());
- if (V->getType() != AllocaTy)
+ V = getIntegerSplat(IRB, II.getValue(), Size);
+
+ if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaBeginOffset)) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ } else {
+ assert(V->getType() == IntTy &&
+ "Wrong type for an alloca wide integer!");
+ }
V = convertValue(TD, IRB, V, AllocaTy);
+ } else {
+ // Established these invariants above.
+ assert(BeginOffset == NewAllocaBeginOffset);
+ assert(EndOffset == NewAllocaEndOffset);
+
+ V = getIntegerSplat(IRB, II.getValue(),
+ TD.getTypeSizeInBits(ScalarTy)/8);
+ if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+ V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+
+ V = convertValue(TD, IRB, V, AllocaTy);
+ }
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
II.isVolatile());
@@ -2814,37 +2900,22 @@ private:
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
- bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
- EndOffset == NewAllocaEndOffset;
- bool IsVectorElement = VecTy && !IsWholeAlloca;
- uint64_t Size = EndOffset - BeginOffset;
- IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
-
- Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
- : II.getRawDest()->getType();
- if (!EmitMemCpy) {
- if (IsVectorElement)
- OtherPtrTy = VecTy->getElementType()->getPointerTo();
- else if (IntTy && !IsWholeAlloca)
- OtherPtrTy = SubIntTy->getPointerTo();
- else
- OtherPtrTy = NewAI.getType();
- }
-
- // Compute the other pointer, folding as much as possible to produce
- // a single, simple GEP in most cases.
- Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
-
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
+ Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
Pass.Worklist.insert(AI);
if (EmitMemCpy) {
+ Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+ : II.getRawDest()->getType();
+
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+ getName("." + OtherPtr->getName()));
+
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
: II.getRawSource()->getType());
@@ -2865,18 +2936,38 @@ private:
if (!Align)
Align = 1;
- Value *SrcPtr = OtherPtr;
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ uint64_t Size = EndOffset - BeginOffset;
+ unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+ unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+ unsigned NumElements = EndIndex - BeginIndex;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+ Type *OtherPtrTy = NewAI.getType();
+ if (VecTy && !IsWholeAlloca) {
+ if (NumElements == 1)
+ OtherPtrTy = VecTy->getElementType();
+ else
+ OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+ OtherPtrTy = OtherPtrTy->getPointerTo();
+ } else if (IntTy && !IsWholeAlloca) {
+ OtherPtrTy = SubIntTy->getPointerTo();
+ }
+
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+ getName("." + OtherPtr->getName()));
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
Value *Src;
- if (IsVectorElement && !IsDest) {
- // We have to extract rather than load.
- Src = IRB.CreateExtractElement(
- IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
- IRB.getInt32(getIndex(BeginOffset)),
- getName(".copyextract"));
+ if (VecTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
@@ -2889,7 +2980,11 @@ private:
getName(".copyload"));
}
- if (IntTy && !IsWholeAlloca && IsDest) {
+ if (VecTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+ } else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
Old = convertValue(TD, IRB, Old, IntTy);
@@ -2899,14 +2994,6 @@ private:
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
- if (IsVectorElement && IsDest) {
- // We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- Src, IRB.getInt32(getIndex(BeginOffset)),
- getName(".insert"));
- }
-
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9160f04fe2..3af62ebcef 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -111,13 +111,11 @@ static bool markAliveBlocks(BasicBlock *BB,
SmallVector<BasicBlock*, 128> Worklist;
Worklist.push_back(BB);
+ Reachable.insert(BB);
bool Changed = false;
do {
BB = Worklist.pop_back_val();
- if (!Reachable.insert(BB))
- continue;
-
// Do a quick scan of the basic block, turning any obviously unreachable
// instructions into LLVM unreachable insts. The instruction combining pass
// canonicalizes unreachable insts into stores to null or undef.
@@ -176,7 +174,8 @@ static bool markAliveBlocks(BasicBlock *BB,
Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- Worklist.push_back(*SI);
+ if (Reachable.insert(*SI))
+ Worklist.push_back(*SI);
} while (!Worklist.empty());
return Changed;
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0e56817a1b..58d973a61a 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -928,3 +928,38 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return 0;
}
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+ if (!DDI)
+ return false;
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ // Create a copy of the original DIDescriptor for user variable, appending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ Type *Int64Ty = Type::getInt64Ty(AI->getContext());
+ SmallVector<Value*, 4> NewDIVarAddress;
+ if (DIVar.hasComplexAddress()) {
+ for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
+ NewDIVarAddress.push_back(
+ ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+ }
+ }
+ NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
+ DIVariable NewDIVar = Builder.createComplexVariable(
+ DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
+ DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
+ NewDIVarAddress, DIVar.getArgNumber());
+
+ // Insert llvm.dbg.declare in the same basic block as the original alloca,
+ // and remove old llvm.dbg.declare.
+ BasicBlock *BB = AI->getParent();
+ Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+ DDI->eraseFromParent();
+ return true;
+}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index feeececedb..d143f919ce 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -44,16 +44,17 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
static cl::opt<bool>
-EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+EnableIfConversion("enable-if-conversion", cl::init(false), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
namespace {
/// The LoopVectorize Pass.
struct LoopVectorize : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
+ /// Pass identification, replacement for typeid
+ static char ID;
- LoopVectorize() : LoopPass(ID) {
+ explicit LoopVectorize() : LoopPass(ID) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -85,28 +86,27 @@ struct LoopVectorize : public LoopPass {
}
// Select the preffered vectorization factor.
- unsigned VF = 1;
- if (VectorizationFactor == 0) {
- const VectorTargetTransformInfo *VTTI = 0;
- if (TTI)
- VTTI = TTI->getVectorTargetTransformInfo();
- // Use the cost model.
- LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
- VF = CM.findBestVectorizationFactor();
-
- if (VF == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- return false;
- }
-
- } else {
- // Use the user command flag.
- VF = VectorizationFactor;
+ const VectorTargetTransformInfo *VTTI = 0;
+ if (TTI)
+ VTTI = TTI->getVectorTargetTransformInfo();
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+
+ // Check the function attribues to find out if this function should be
+ // optimized for size.
+ Function *F = L->getHeader()->getParent();
+ Attributes::AttrVal SzAttr= Attributes::OptimizeForSize;
+ bool OptForSize = F->getFnAttributes().hasAttribute(SzAttr);
+
+ unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+
+ if (VF == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
}
DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
- L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
- "\n");
+ F->getParent()->getModuleIdentifier()<<"\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF);
@@ -407,27 +407,27 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- vector loop bypass.
- / |
- / v
+ [ ] <-- vector loop bypass.
+ / |
+ / v
| [ ] <-- vector pre header.
| |
| v
| [ ] \
| [ ]_| <-- vector loop.
| |
- \ v
- >[ ] <--- middle-block.
- / |
- / v
+ \ v
+ >[ ] <--- middle-block.
+ / |
+ / v
| [ ] <--- new preheader.
| |
| v
| [ ] \
| [ ]_| <-- old scalar loop to handle remainder.
- \ |
- \ v
- >[ ] <-- exit block.
+ \ |
+ \ v
+ >[ ] <-- exit block.
...
*/
@@ -954,7 +954,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- Value *Cond = createBlockInMask(P->getIncomingBlock(0));
+ Value *Cond = createEdgeMask(P->getIncomingBlock(0), P->getParent());
WidenMap[P] =
Builder.CreateSelect(Cond,
getVectorValue(P->getIncomingValue(0)),
@@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- /// Vectorize bitcasts.
CastInst *CI = dyn_cast<CastInst>(it);
+ /// Optimize the special case where the source is the induction
+ /// variable. Notice that we can only optimize the 'trunc' case
+ /// because: a. FP conversions lose precision, b. sext/zext may wrap,
+ /// c. other casts depend on pointer size.
+ if (CI->getOperand(0) == OldInduction &&
+ it->getOpcode() == Instruction::Trunc) {
+ Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+ CI->getType());
+ Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+ WidenMap[it] = getConsecutiveVector(Broadcasted);
+ break;
+ }
+ /// Vectorize casts.
Value *A = getVectorValue(it->getOperand(0));
Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
@@ -1263,6 +1275,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
BasicBlock *BB = LoopBlocks[i];
+ // We don't support switch statements inside loops.
+ if (!isa<BranchInst>(BB->getTerminator()))
+ return false;
+
// We must have at most two predecessors because we need to convert
// all PHIs to selects.
unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
@@ -1832,6 +1848,15 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return NoInduction;
}
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+ Value *In0 = const_cast<Value*>(V);
+ PHINode *PN = dyn_cast_or_null<PHINode>(In0);
+ if (!PN)
+ return false;
+
+ return Inductions.count(PN);
+}
+
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
assert(TheLoop->contains(BB) && "Unknown block used");
@@ -1846,7 +1871,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
return false;
- // The isntructions below can trap.
+ // The instructions below can trap.
switch (it->getOpcode()) {
default: continue;
case Instruction::UDiv:
@@ -1870,7 +1895,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
}
unsigned
-LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+ unsigned UserVF) {
+ if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+ DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+ return 1;
+ }
+
+ // Find the trip count.
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+ DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+ unsigned VF = MaxVectorSize;
+
+ // If we optimize the program for size, avoid creating the tail loop.
+ if (OptForSize) {
+ // If we are unable to calculate the trip count then don't try to vectorize.
+ if (TC < 2) {
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ return 1;
+ }
+
+ // Find the maximum SIMD width that can fit within the trip count.
+ VF = TC % MaxVectorSize;
+
+ if (VF == 0)
+ VF = MaxVectorSize;
+
+ // If the trip count that we found modulo the vectorization factor is not
+ // zero then we require a tail.
+ if (VF < 2) {
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ return 1;
+ }
+ }
+
+ if (UserVF != 0) {
+ assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+ DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+ return UserVF;
+ }
+
if (!VTTI) {
DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
return 1;
@@ -2052,6 +2118,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
+ // We optimize the truncation of induction variable.
+ // The cost of these is the same as the scalar operation.
+ if (I->getOpcode() == Instruction::Trunc &&
+ Legal->isInductionVariable(I->getOperand(0)))
+ return VTTI->getCastInstrCost(I->getOpcode(), I->getType(),
+ I->getOperand(0)->getType());
+
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h
index 9d6d80e22b..e5ef29052e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.h
+++ b/lib/Transforms/Vectorize/LoopVectorize.h
@@ -320,6 +320,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Returns True if V is an induction variable in this loop.
+ bool isInductionVariable(const Value *V);
+
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);
@@ -420,10 +423,11 @@ public:
const VectorTargetTransformInfo *Vtti):
TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
- /// Returns the most profitable vectorization factor for the loop that is
- /// smaller or equal to the VF argument. This method checks every power
- /// of two up to VF.
- unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
+ /// Returns the most profitable vectorization factor in powers of two.
+ /// This method checks every power of two up to VF. If UserVF is not ZERO
+ /// then this vectorization factor will be selected if vectorization is
+ /// possible.
+ unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF);
private:
/// Returns the expected execution cost. The unit of the cost does
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 3fb36cadea..19eefd2f87 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-//===-- Vectorize.cpp -----------------------------------------------------===//
+ //===-- Vectorize.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//