diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2012-12-20 22:31:43 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2012-12-20 22:31:43 +0000 |
commit | 41d2daa9344a4c4e8bb88dba51cd087c0648b695 (patch) | |
tree | 7e22b30849a5a15a044a3423d0e8df884992ceea /lib/Transforms | |
parent | 571f3e0cc9e767d1e3dcfd3e7e0b3d11ec4f7884 (diff) | |
parent | 6c583141bf6b7a6b5f8125c1037ecbc089813288 (diff) |
Updating branches/google/testing to 170392
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@170779 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/IPO/Internalize.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 102 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 34 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 54 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/MemorySanitizer.cpp | 217 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 373 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 35 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 145 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.h | 12 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/Vectorize.cpp | 2 |
14 files changed, 650 insertions, 355 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index bd8fa66d52..b636414250 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -214,11 +214,13 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { OptSizeThreshold < thres) thres = OptSizeThreshold; - // Listen to the inlinehint attribute when it would increase the threshold. + // Listen to the inlinehint attribute when it would increase the threshold + // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && Callee->getFnAttributes().hasAttribute(Attributes::InlineHint); - if (InlineHint && HintThreshold > thres) + if (InlineHint && HintThreshold > thres + && !Caller->getFnAttributes().hasAttribute(Attributes::MinSize)) thres = HintThreshold; return thres; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index b2cd3a765a..bd94f0a252 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -48,7 +48,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit InternalizePass(); - explicit InternalizePass(const std::vector <const char *>& exportList); + explicit InternalizePass(ArrayRef<const char *> exportList); void LoadFile(const char *Filename); virtual bool runOnModule(Module &M); @@ -72,10 +72,10 @@ InternalizePass::InternalizePass() ExternalNames.insert(APIList.begin(), APIList.end()); } -InternalizePass::InternalizePass(const std::vector<const char *>&exportList) +InternalizePass::InternalizePass(ArrayRef<const char *> exportList) : ModulePass(ID){ initializeInternalizePassPass(*PassRegistry::getPassRegistry()); - for(std::vector<const char *>::const_iterator itr = exportList.begin(); + for(ArrayRef<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); } @@ -173,6 +173,6 @@ ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } -ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) { +ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) { return new InternalizePass(el); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d8257e64d8..47223c3b35 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -37,10 +37,10 @@ static Constant *SubOne(ConstantInt *C) { static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (!V->hasOneUse() || !V->getType()->isIntegerTy()) return 0; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) return I->getOperand(0); @@ -64,22 +64,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. - - // Add has the property that adding any two 2's complement numbers can only + + // Add has the property that adding any two 2's complement numbers can only // have one carry bit which can change a sign. As such, if LHS and RHS each // have at least two sign bits, we know that the addition of the two values // will sign extend fine. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; - - + + // If one of the operands only has one non-zero bit, and if the other operand // has a known-zero bit in a more significant place than it (not including the // sign bit) the ripple may go up to and fill the zero, but won't change the // sign. For example, (X & ~4) + 1. - + // TODO: Implement. - + return false; } @@ -100,7 +100,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const APInt &Val = CI->getValue(); if (Val.isSignBit()) return BinaryOperator::CreateXor(LHS, RHS); - + // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 if (SimplifyDemandedInstructionBits(I)) @@ -110,7 +110,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - + Value *XorLHS = 0; ConstantInt *XorRHS = 0; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); @@ -124,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); @@ -175,7 +175,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } - + return BinaryOperator::CreateSub(RHS, LHSV); } @@ -209,7 +209,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); - + // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateOr(LHS, RHS); @@ -251,7 +251,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. const APInt &AddRHSV = CRHS->getValue(); - + // Form a mask of all bits from the lowest bit added through the top. APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); @@ -289,7 +289,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); @@ -301,18 +301,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } - + // (add (sext x), (sext y)) --> (sext (add int x, y)) if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -323,7 +323,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -351,18 +351,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; // -A + B --> B - A // -A + -B --> -(A + B) @@ -374,11 +368,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = dyn_castFNegVal(RHS)) return BinaryOperator::CreateFSub(LHS, V); - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); - // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { @@ -388,7 +377,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // requires a constant pool load, and generally allows the add to be better // instcombined. if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && @@ -399,7 +388,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return new SIToFPInst(NewAdd, I.getType()); } } - + // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -410,13 +399,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); return new SIToFPInst(NewAdd, I.getType()); } } } - + return Changed ? &I : 0; } @@ -428,7 +417,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { assert(TD && "Must have target data info for this"); - + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -451,7 +440,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // X - (gep X, ...) if (RHSGEP->getOperand(0) == LHS) { @@ -467,16 +456,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. if (GEP1 == 0 || (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) return 0; - + // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); - + // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. if (GEP2) { @@ -517,7 +506,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Replace (-1 - A) with (~A). if (match(Op0, m_AllOnes())) return BinaryOperator::CreateNot(Op1); - + if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { // C - ~X == X + (1+C) Value *X = 0; @@ -553,18 +542,18 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return &I; } - + { Value *Y; // X-(X+Y) == -Y X-(Y+X) == -Y if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateNeg(Y); - + // (X-Y)-X == -Y if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) return BinaryOperator::CreateNeg(Y); } - + if (Op1->hasOneUse()) { Value *X = 0, *Y = 0, *Z = 0; Constant *C = 0; @@ -581,7 +570,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(Y, Y->getName() + ".not")); - + // 0 - (X sdiv C) -> (X sdiv -C) if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero())) @@ -604,14 +593,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); return BinaryOperator::CreateMul(Op0, C); } - + // X - A*-B -> X + A*B // X - -A*B -> X + A*B Value *A, *B; if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); - + // X - A*CI -> X + A*-CI // X - CI*A -> X + A*-CI if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || @@ -630,7 +619,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (X == dyn_castFoldableMul(Op1, C2)) return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } - + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (TD) { @@ -639,20 +628,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_PtrToInt(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); - + // trunc(p)-trunc(q) -> trunc(p-q) if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); } - + return 0; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + // If this is a 'B = x-(-A)', change to B = x+A... if (Value *V = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFAdd(Op0, V); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 784742f274..ba4a57329d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,9 +16,11 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/DataLayout.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +using namespace PatternMatch; STATISTIC(NumSimplified, "Number of library calls simplified"); @@ -276,25 +278,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } - case Intrinsic::bswap: + case Intrinsic::bswap: { + Value *IIOperand = II->getArgOperand(0); + Value *X = 0; + // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); + if (match(IIOperand, m_BSwap(m_Value(X)))) + return ReplaceInstUsesWith(CI, X); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) - if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) { - unsigned C = Operand->getType()->getPrimitiveSizeInBits() - - TI->getType()->getPrimitiveSizeInBits(); - Value *CV = ConstantInt::get(Operand->getType(), C); - Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV); - return new TruncInst(V, TI->getType()); - } + if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { + unsigned C = X->getType()->getPrimitiveSizeInBits() - + IIOperand->getType()->getPrimitiveSizeInBits(); + Value *CV = ConstantInt::get(X->getType(), C); + Value *V = Builder->CreateLShr(X, CV); + return new TruncInst(V, IIOperand->getType()); } - break; + } + case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 @@ -693,7 +695,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Splat->isOne()) { if (Zext) return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); - // else + // else return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); } } @@ -899,7 +901,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { new StoreInst(ConstantInt::getTrue(Callee->getContext()), UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); - // If OldCall dues not return void then replaceAllUsesWith undef. + // If OldCall does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 5cd611c420..964297a5ea 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. - isPowerOfTwo(PowerOf2, IC.getDataLayout())) { + isKnownToBeAPowerOfTwo(PowerOf2)) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -45,8 +45,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) - if (I->isLogicalShift() && - isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) { + if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { @@ -296,20 +295,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0' - } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue())) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -351,6 +341,38 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } + // X * cond ? 1.0 : 0.0 => cond ? X : 0.0 + if (I.hasNoNaNs() && I.hasNoSignedZeros()) { + Value *V0 = I.getOperand(0); + Value *V1 = I.getOperand(1); + Value *Cond, *SLHS, *SRHS; + bool Match = false; + + if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) { + Match = true; + } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS), + m_Value(SRHS)))) { + Match = true; + std::swap(V0, V1); + } + + if (Match) { + ConstantFP *C0 = dyn_cast<ConstantFP>(SLHS); + ConstantFP *C1 = dyn_cast<ConstantFP>(SRHS); + + if (C0 && C1 && + ((C0->isZero() && C1->isExactlyValue(1.0)) || + (C1->isZero() && C0->isExactlyValue(1.0)))) { + Value *T; + if (C0->isZero()) + T = Builder->CreateSelect(Cond, SLHS, V1); + else + T = Builder->CreateSelect(Cond, V1, SRHS); + return ReplaceInstUsesWith(I, T); + } + } + } + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 08aedb3200..13653183a7 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -858,8 +858,8 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); - APInt BitMask1(Ty->getIntegerBitWidth(), (uint64_t)-1); - APInt BitMask2(Ty->getIntegerBitWidth(), (uint64_t)-1); + APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); + APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); bool isLshr = (Shr->getOpcode() == Instruction::LShr); BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : @@ -891,6 +891,8 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : BinaryOperator::CreateAShr(VarX, Amt); + if (cast<BinaryOperator>(Shr)->isExact()) + New->setIsExact(true); } return InsertNewInstWith(New, *Shl); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f095cff33c..e0c610ffa4 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/DataLayout.h" +#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" @@ -38,6 +39,7 @@ #include "llvm/Support/system_error.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Type.h" #include <algorithm> @@ -1158,6 +1160,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { SmallVector<Instruction*, 8> RetVec; uint64_t TotalSize = 0; bool HavePoisonedAllocas = false; + DIBuilder DIB(*F.getParent()); // Filter out Alloca instructions we want (and can) handle. // Collect Ret instructions. @@ -1228,6 +1231,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), AI->getType()); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB); AI->replaceAllUsesWith(NewAllocaPtr); // Analyze lifetime intrinsics only for static allocas we handle. if (CheckLifetime) diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 947a2e3b12..59902269a0 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -76,6 +76,7 @@ static const uint64_t kShadowMask32 = 1ULL << 31; static const uint64_t kShadowMask64 = 1ULL << 46; static const uint64_t kOriginOffset32 = 1ULL << 30; static const uint64_t kOriginOffset64 = 1ULL << 45; +static const uint64_t kShadowTLSAlignment = 8; // This is an important flag that makes the reports much more // informative at the cost of greater slowdown. Not fully implemented @@ -132,14 +133,14 @@ namespace { /// MemorySanitizer: instrument the code in module to find /// uninitialized reads. class MemorySanitizer : public FunctionPass { -public: + public: MemorySanitizer() : FunctionPass(ID), TD(0), WarningFn(0) { } const char *getPassName() const { return "MemorySanitizer"; } bool runOnFunction(Function &F); bool doInitialization(Module &M); static char ID; // Pass identification, replacement for typeid. -private: + private: void initializeCallbacks(Module &M); DataLayout *TD; @@ -241,8 +242,8 @@ void MemorySanitizer::initializeCallbacks(Module &M) { MsanPoisonStackFn = M.getOrInsertFunction( "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL); MemmoveFn = M.getOrInsertFunction( - "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IntptrTy, NULL); + "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL); MemcpyFn = M.getOrInsertFunction( "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL); @@ -377,7 +378,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // An unfortunate workaround for asymmetric lowering of va_arg stuff. // See a comment in visitCallSite for more details. - static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 static const unsigned AMD64FpEndOffset = 176; struct ShadowOriginAndInsertPoint { @@ -409,7 +410,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Shadow = getShadow(Val); Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); - StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + StoreInst *NewSI = + IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; // If the store is volatile, add a check. @@ -420,7 +422,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (ClTrackOrigins) { if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) { - IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment()); + IRB.CreateStore(getOrigin(Val), getOriginPtr(Addr, IRB)); } else { Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); @@ -434,10 +436,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = - SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, MS.OriginStoreWeights); - IRBuilder<> IRBNewBlock(CheckTerm); - IRBNewBlock.CreateAlignedStore(getOrigin(Val), - getOriginPtr(Addr, IRBNewBlock), I.getAlignment()); + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, + MS.OriginStoreWeights); + IRBuilder<> IRBNew(CheckTerm); + IRBNew.CreateStore(getOrigin(Val), getOriginPtr(Addr, IRBNew)); } } } @@ -768,7 +770,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns)); } - //------------------- Visitors. + // ------------------- Visitors. /// \brief Instrument LoadInst /// @@ -786,7 +788,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { insertCheck(I.getPointerOperand(), &I); if (ClTrackOrigins) - setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment())); + setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); } /// \brief Instrument StoreInst @@ -918,67 +920,133 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } - /// \brief Propagate origin for an instruction. + /// \brief Default propagation of shadow and/or origin. /// - /// This is a general case of origin propagation. For an Nary operation, - /// is set to the origin of an argument that is not entirely initialized. - /// If there is more than one such arguments, the rightmost of them is picked. - /// It does not matter which one is picked if all arguments are initialized. - void setOriginForNaryOp(Instruction &I) { - if (!ClTrackOrigins) return; - IRBuilder<> IRB(&I); - Value *Origin = getOrigin(&I, 0); - for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) { - Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB); - Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)), - getOrigin(&I, Op), Origin); + /// This class implements the general case of shadow propagation, used in all + /// cases where we don't know and/or don't care about what the operation + /// actually does. It converts all input shadow values to a common type + /// (extending or truncating as necessary), and bitwise OR's them. + /// + /// This is much cheaper than inserting checks (i.e. requiring inputs to be + /// fully initialized), and less prone to false positives. + /// + /// This class also implements the general case of origin propagation. For a + /// Nary operation, result origin is set to the origin of an argument that is + /// not entirely initialized. If there is more than one such arguments, the + /// rightmost of them is picked. It does not matter which one is picked if all + /// arguments are initialized. + template <bool CombineShadow> + class Combiner { + Value *Shadow; + Value *Origin; + IRBuilder<> &IRB; + MemorySanitizerVisitor *MSV; + + public: + Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) : + Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {} + + /// \brief Add a pair of shadow and origin values to the mix. + Combiner &Add(Value *OpShadow, Value *OpOrigin) { + if (CombineShadow) { + assert(OpShadow); + if (!Shadow) + Shadow = OpShadow; + else { + OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType()); + Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop"); + } + } + + if (ClTrackOrigins) { + assert(OpOrigin); + if (!Origin) { + Origin = OpOrigin; + } else { + Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB); + Value *Cond = IRB.CreateICmpNE(FlatShadow, + MSV->getCleanShadow(FlatShadow)); + Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); + } + } + return *this; } - setOrigin(&I, Origin); - } - /// \brief Propagate shadow for a binary operation. - /// - /// Shadow = Shadow0 | Shadow1, all 3 must have the same type. - /// Bitwise OR is selected as an operation that will never lose even a bit of - /// poison. - void handleShadowOrBinary(Instruction &I) { + /// \brief Add an application value to the mix. + Combiner &Add(Value *V) { + Value *OpShadow = MSV->getShadow(V); + Value *OpOrigin = ClTrackOrigins ? MSV->getOrigin(V) : 0; + return Add(OpShadow, OpOrigin); + } + + /// \brief Set the current combined values as the given instruction's shadow + /// and origin. + void Done(Instruction *I) { + if (CombineShadow) { + assert(Shadow); + Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I)); + MSV->setShadow(I, Shadow); + } + if (ClTrackOrigins) { + assert(Origin); + MSV->setOrigin(I, Origin); + } + } + }; + + typedef Combiner<true> ShadowAndOriginCombiner; + typedef Combiner<false> OriginCombiner; + + /// \brief Propagate origin for arbitrary operation. + void setOriginForNaryOp(Instruction &I) { + if (!ClTrackOrigins) return; IRBuilder<> IRB(&I); - Value *Shadow0 = getShadow(&I, 0); - Value *Shadow1 = getShadow(&I, 1); - setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop")); - setOriginForNaryOp(I); + OriginCombiner OC(this, IRB); + for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI) + OC.Add(OI->get()); + OC.Done(&I); + } + + size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) { + return Ty->isVectorTy() ? + Ty->getVectorNumElements() * Ty->getScalarSizeInBits() : + Ty->getPrimitiveSizeInBits(); + } + + /// \brief Cast between two shadow types, extending or truncating as + /// necessary. + Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) { + Type *srcTy = V->getType(); + if (dstTy->isIntegerTy() && srcTy->isIntegerTy()) + return IRB.CreateIntCast(V, dstTy, false); + if (dstTy->isVectorTy() && srcTy->isVectorTy() && + dstTy->getVectorNumElements() == srcTy->getVectorNumElements()) + return IRB.CreateIntCast(V, dstTy, false); + size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); + size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); + Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits)); + Value *V2 = + IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false); + return IRB.CreateBitCast(V2, dstTy); + // TODO: handle struct types. } /// \brief Propagate shadow for arbitrary operation. - /// - /// This is a general case of shadow propagation, used in all cases where we - /// don't know and/or care about what the operation actually does. - /// It converts all input shadow values to a common type (extending or - /// truncating as necessary), and bitwise OR's them. - /// - /// This is much cheaper than inserting checks (i.e. requiring inputs to be - /// fully initialized), and less prone to false positives. - // FIXME: is the casting actually correct? - // FIXME: merge this with handleShadowOrBinary. void handleShadowOr(Instruction &I) { IRBuilder<> IRB(&I); - Value *Shadow = getShadow(&I, 0); - for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) - Shadow = IRB.CreateOr( - Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false), - "_msprop"); - Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false); - setShadow(&I, Shadow); - setOriginForNaryOp(I); + ShadowAndOriginCombiner SC(this, IRB); + for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI) + SC.Add(OI->get()); + SC.Done(&I); } - void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); } - void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); } + void visitFAdd(BinaryOperator &I) { handleShadowOr(I); } + void visitFSub(BinaryOperator &I) { handleShadowOr(I); } + void visitFMul(BinaryOperator &I) { handleShadowOr(I); } + void visitAdd(BinaryOperator &I) { handleShadowOr(I); } + void visitSub(BinaryOperator &I) { handleShadowOr(I); } + void visitXor(BinaryOperator &I) { handleShadowOr(I); } + void visitMul(BinaryOperator &I) { handleShadowOr(I); } void handleDiv(Instruction &I) { IRBuilder<> IRB(&I); @@ -1155,9 +1223,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case llvm::Intrinsic::bswap: - handleBswap(I); break; + handleBswap(I); + break; default: - visitInstruction(I); break; + visitInstruction(I); + break; } } @@ -1226,7 +1296,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Size, Alignment); } else { Size = MS.TD->getTypeAllocSize(A->getType()); - Store = IRB.CreateStore(ArgShadow, ArgShadowBase); + Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, + kShadowTLSAlignment); } if (ClTrackOrigins) IRB.CreateStore(getOrigin(A), @@ -1248,7 +1319,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRBBefore(&I); // Untill we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&I, IRBBefore); - IRBBefore.CreateStore(getCleanShadow(&I), Base); + IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment); Instruction *NextInsn = 0; if (CS.isCall()) { NextInsn = I.getNextNode(); @@ -1267,8 +1338,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { "Could not find insertion point for retval shadow load"); } IRBuilder<> IRBAfter(NextInsn); - setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter), - "_msret")); + Value *RetvalShadow = + IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter), + kShadowTLSAlignment, "_msret"); + setShadow(&I, RetvalShadow); if (ClTrackOrigins) setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter))); } @@ -1280,7 +1353,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Shadow = getShadow(RetVal); Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n"); - IRB.CreateStore(Shadow, ShadowPtr); + IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); if (ClTrackOrigins) IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); } @@ -1407,7 +1480,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { struct VarArgAMD64Helper : public VarArgHelper { // An unfortunate workaround for asymmetric lowering of va_arg stuff. // See a comment in visitCallSite for more details. - static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 + static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 static const unsigned AMD64FpEndOffset = 176; Function &F; @@ -1471,7 +1544,7 @@ struct VarArgAMD64Helper : public VarArgHelper { Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset); OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8); } - IRB.CreateStore(MSV.getShadow(A), Base); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } Constant *OverflowSize = ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1c220ca0f6..a0ee849a03 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2150,7 +2150,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, !canConvertValue(TD, ValueTy, AllocaTy)) return false; } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { - if (MI->isVolatile()) + if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO @@ -2223,6 +2223,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } +static Value *extractVector(IRBuilder<> &IRB, Value *V, + unsigned BeginIndex, unsigned EndIndex, + const Twine &Name) { + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + if (NumElements == VecTy->getNumElements()) + return V; + + if (NumElements == 1) { + V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), + Name + ".extract"); + DEBUG(dbgs() << " extract: " << *V << "\n"); + return V; + } + + SmallVector<Constant*, 8> Mask; + Mask.reserve(NumElements); + for (unsigned i = BeginIndex; i != EndIndex; ++i) + Mask.push_back(IRB.getInt32(i)); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".extract"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); + return V; +} + +static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, + unsigned BeginIndex, const Twine &Name) { + VectorType *VecTy = cast<VectorType>(Old->getType()); + assert(VecTy && "Can only insert a vector into a vector"); + + VectorType *Ty = dyn_cast<VectorType>(V->getType()); + if (!Ty) { + // Single element to insert. + V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), + Name + ".insert"); + DEBUG(dbgs() << " insert: " << *V << "\n"); + return V; + } + + assert(Ty->getNumElements() <= VecTy->getNumElements() && + "Too many elements!"); + if (Ty->getNumElements() == VecTy->getNumElements()) { + assert(V->getType() == VecTy && "Vector type mismatch"); + return V; + } + unsigned EndIndex = BeginIndex + Ty->getNumElements(); + + // When inserting a smaller vector into the larger to store, we first + // use a shuffle vector to widen it with undef elements, and then + // a second shuffle vector to select between the loaded vector and the + // incoming vector. + SmallVector<Constant*, 8> Mask; + Mask.reserve(VecTy->getNumElements()); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i - BeginIndex)); + else + Mask.push_back(UndefValue::get(IRB.getInt32Ty())); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".expand"); + DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + + Mask.clear(); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i)); + else + Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); + V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), + Name + "insert"); + DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + return V; +} + namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. @@ -2388,29 +2466,14 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); - unsigned NumElements = EndIndex - BeginIndex; - assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - if (NumElements == 1) { - V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), - getName(".extract")); - DEBUG(dbgs() << " extract: " << *V << "\n"); - } else if (NumElements < VecTy->getNumElements()) { - SmallVector<Constant*, 8> Mask; - Mask.reserve(NumElements); - for (unsigned i = BeginIndex; i != EndIndex; ++i) - Mask.push_back(IRB.getInt32(i)); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".extract")); - DEBUG(dbgs() << " shuffle: " << *V << "\n"); - } - return V; + + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); } Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { @@ -2457,7 +2520,7 @@ private: bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + V = rewriteVectorizedLoadInst(IRB); } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(IRB, LI); } else if (BeginOffset == NewAllocaBeginOffset && @@ -2518,44 +2581,12 @@ private: : VectorType::get(ElementTy, NumElements); if (V->getType() != PartitionTy) V = convertValue(TD, IRB, V, PartitionTy); - if (NumElements < VecTy->getNumElements()) { - // We need to mix in the existing elements. - LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - if (NumElements == 1) { - V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex), - getName(".insert")); - DEBUG(dbgs() << " insert: " << *V << "\n"); - } else { - // When inserting a smaller vector into the larger to store, we first - // use a shuffle vector to widen it with undef elements, and then - // a second shuffle vector to select between the loaded vector and the - // incoming vector. - SmallVector<Constant*, 8> Mask; - Mask.reserve(VecTy->getNumElements()); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i - BeginIndex)); - else - Mask.push_back(UndefValue::get(IRB.getInt32Ty())); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".expand")); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); - - Mask.clear(); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask), - getName("insert")); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); - } - } else { - V = convertValue(TD, IRB, V, VecTy); - } + + // Mix in the existing elements. + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2607,7 +2638,7 @@ private: TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); assert(V->getType()->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, @@ -2639,6 +2670,51 @@ private: return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } + /// \brief Compute an integer value from splatting an i8 across the given + /// number of bytes. + /// + /// Note that this routine assumes an i8 is a byte. If that isn't true, don't + /// call this routine. + /// FIXME: Heed the abvice above. + /// + /// \param V The i8 value to splat. + /// \param Size The number of bytes in the output (assuming i8 is one byte) + Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + assert(Size > 0 && "Expected a positive number of bytes."); + IntegerType *VTy = cast<IntegerType>(V->getType()); + assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); + if (Size == 1) + return V; + + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + ConstantExpr::getUDiv( + Constant::getAllOnesValue(SplatIntTy), + ConstantExpr::getZExt( + Constant::getAllOnesValue(V->getType()), + SplatIntTy)), + getName(".isplat")); + return V; + } + + /// \brief Compute a vector splat for a given element value. + Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { + assert(NumElements > 0 && "Cannot splat to an empty vector."); + + // First insert it into a one-element vector so we can shuffle it. It is + // really silly that LLVM's IR requires this in order to form a splat. + Value *Undef = UndefValue::get(VectorType::get(V->getType(), 1)); + V = IRB.CreateInsertElement(Undef, V, IRB.getInt32(0), + getName(".splatinsert")); + + // Shuffle the value across the desired number of elements. + SmallVector<Constant*, 8> Mask(NumElements, IRB.getInt32(0)); + V = IRB.CreateShuffleVector(V, Undef, ConstantVector::get(Mask), + getName(".splat")); + DEBUG(dbgs() << " splat: " << *V << "\n"); + return V; + } + bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); IRBuilder<> IRB(&II); @@ -2667,7 +2743,8 @@ private: (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaEndOffset || !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) || + TD.getTypeSizeInBits(ScalarTy)%8 != 0)) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2683,53 +2760,62 @@ private: // If we can represent this as a simple value, we have to build the actual // value to store, which requires expanding the byte present in memset to // a sensible representation for the alloca type. This is essentially - // splatting the byte to a sufficiently wide integer, bitcasting to the - // desired scalar type, and splatting it across any desired vector type. + // splatting the byte to a sufficiently wide integer, splatting it across + // any desired vector width, and bitcasting to the final type. uint64_t Size = EndOffset - BeginOffset; - Value *V = II.getValue(); - IntegerType *VTy = cast<IntegerType>(V->getType()); - Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - if (Size*8 > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), - ConstantExpr::getUDiv( - Constant::getAllOnesValue(SplatIntTy), - ConstantExpr::getZExt( - Constant::getAllOnesValue(V->getType()), - SplatIntTy)), - getName(".isplat")); - - // If this is an element-wide memset of a vectorizable alloca, insert it. - if (VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - if (V->getType() != ScalarTy) - V = convertValue(TD, IRB, V, ScalarTy); - StoreInst *Store = IRB.CreateAlignedStore( - IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, - NewAI.getAlignment(), - getName(".load")), - V, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")), - &NewAI, NewAI.getAlignment()); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); - return true; - } + Value *V = getIntegerSplat(IRB, II.getValue(), Size); + + if (VecTy) { + // If this is a memset of a vectorized alloca, insert it. + assert(ElementTy == ScalarTy); + + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + Value *Splat = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ElementTy)/8); + Splat = convertValue(TD, IRB, Splat, ElementTy); + if (NumElements > 1) + Splat = getVectorSplat(IRB, Splat, NumElements); - // If this is a memset on an alloca where we can widen stores, insert the - // set integer. - if (IntTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - assert(!II.isVolatile()); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); - Old = convertValue(TD, IRB, Old, IntTy); - assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); - uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); - } + V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + } else if (IntTy) { + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + assert(!II.isVolatile()); - if (V->getType() != AllocaTy) + V = getIntegerSplat(IRB, II.getValue(), Size); + + if (IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaBeginOffset)) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + } else { + assert(V->getType() == IntTy && + "Wrong type for an alloca wide integer!"); + } V = convertValue(TD, IRB, V, AllocaTy); + } else { + // Established these invariants above. + assert(BeginOffset == NewAllocaBeginOffset); + assert(EndOffset == NewAllocaEndOffset); + + V = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ScalarTy)/8); + if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) + V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + + V = convertValue(TD, IRB, V, AllocaTy); + } Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); @@ -2814,37 +2900,22 @@ private: // Record this instruction for deletion. Pass.DeadInsts.insert(&II); - bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && - EndOffset == NewAllocaEndOffset; - bool IsVectorElement = VecTy && !IsWholeAlloca; - uint64_t Size = EndOffset - BeginOffset; - IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; - - Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() - : II.getRawDest()->getType(); - if (!EmitMemCpy) { - if (IsVectorElement) - OtherPtrTy = VecTy->getElementType()->getPointerTo(); - else if (IntTy && !IsWholeAlloca) - OtherPtrTy = SubIntTy->getPointerTo(); - else - OtherPtrTy = NewAI.getType(); - } - - // Compute the other pointer, folding as much as possible to produce - // a single, simple GEP in most cases. - Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); - // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. + Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); if (AllocaInst *AI = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) Pass.Worklist.insert(AI); if (EmitMemCpy) { + Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() + : II.getRawDest()->getType(); + + // Compute the other pointer, folding as much as possible to produce + // a single, simple GEP in most cases. + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); + Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType()); @@ -2865,18 +2936,38 @@ private: if (!Align) Align = 1; - Value *SrcPtr = OtherPtr; + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + uint64_t Size = EndOffset - BeginOffset; + unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0; + unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0; + unsigned NumElements = EndIndex - BeginIndex; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + + Type *OtherPtrTy = NewAI.getType(); + if (VecTy && !IsWholeAlloca) { + if (NumElements == 1) + OtherPtrTy = VecTy->getElementType(); + else + OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); + + OtherPtrTy = OtherPtrTy->getPointerTo(); + } else if (IntTy && !IsWholeAlloca) { + OtherPtrTy = SubIntTy->getPointerTo(); + } + + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); Value *Src; - if (IsVectorElement && !IsDest) { - // We have to extract rather than load. - Src = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), - IRB.getInt32(getIndex(BeginOffset)), - getName(".copyextract")); + if (VecTy && !IsWholeAlloca && !IsDest) { + Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); @@ -2889,7 +2980,11 @@ private: getName(".copyload")); } - if (IntTy && !IsWholeAlloca && IsDest) { + if (VecTy && !IsWholeAlloca && IsDest) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); Old = convertValue(TD, IRB, Old, IntTy); @@ -2899,14 +2994,6 @@ private: Src = convertValue(TD, IRB, Src, NewAllocaTy); } - if (IsVectorElement && IsDest) { - // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - Src, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")); - } - StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9160f04fe2..3af62ebcef 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -111,13 +111,11 @@ static bool markAliveBlocks(BasicBlock *BB, SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); + Reachable.insert(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); - if (!Reachable.insert(BB)) - continue; - // Do a quick scan of the basic block, turning any obviously unreachable // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. @@ -176,7 +174,8 @@ static bool markAliveBlocks(BasicBlock *BB, Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - Worklist.push_back(*SI); + if (Reachable.insert(*SI)) + Worklist.push_back(*SI); } while (!Worklist.empty()); return Changed; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 0e56817a1b..58d973a61a 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -928,3 +928,38 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return 0; } + +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); + if (!DDI) + return false; + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + // Create a copy of the original DIDescriptor for user variable, appending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + Type *Int64Ty = Type::getInt64Ty(AI->getContext()); + SmallVector<Value*, 4> NewDIVarAddress; + if (DIVar.hasComplexAddress()) { + for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) { + NewDIVarAddress.push_back( + ConstantInt::get(Int64Ty, DIVar.getAddrElement(i))); + } + } + NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref)); + DIVariable NewDIVar = Builder.createComplexVariable( + DIVar.getTag(), DIVar.getContext(), DIVar.getName(), + DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(), + NewDIVarAddress, DIVar.getArgNumber()); + + // Insert llvm.dbg.declare in the same basic block as the original alloca, + // and remove old llvm.dbg.declare. + BasicBlock *BB = AI->getParent(); + Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB); + DDI->eraseFromParent(); + return true; +} diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index feeececedb..d143f919ce 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -44,16 +44,17 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")); static cl::opt<bool> -EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, +EnableIfConversion("enable-if-conversion", cl::init(false), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); namespace { /// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { - static char ID; // Pass identification, replacement for typeid + /// Pass identification, replacement for typeid + static char ID; - LoopVectorize() : LoopPass(ID) { + explicit LoopVectorize() : LoopPass(ID) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -85,28 +86,27 @@ struct LoopVectorize : public LoopPass { } // Select the preffered vectorization factor. - unsigned VF = 1; - if (VectorizationFactor == 0) { - const VectorTargetTransformInfo *VTTI = 0; - if (TTI) - VTTI = TTI->getVectorTargetTransformInfo(); - // Use the cost model. - LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); - VF = CM.findBestVectorizationFactor(); - - if (VF == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); - return false; - } - - } else { - // Use the user command flag. - VF = VectorizationFactor; + const VectorTargetTransformInfo *VTTI = 0; + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + // Use the cost model. + LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); + + // Check the function attribues to find out if this function should be + // optimized for size. + Function *F = L->getHeader()->getParent(); + Attributes::AttrVal SzAttr= Attributes::OptimizeForSize; + bool OptForSize = F->getFnAttributes().hasAttribute(SzAttr); + + unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); + + if (VF == 1) { + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + return false; } DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<< - L->getHeader()->getParent()->getParent()->getModuleIdentifier()<< - "\n"); + F->getParent()->getModuleIdentifier()<<"\n"); // If we decided that it is *legal* to vectorizer the loop then do it. InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF); @@ -407,27 +407,27 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass. - / | - / v + [ ] <-- vector loop bypass. + / | + / v | [ ] <-- vector pre header. | | | v | [ ] \ | [ ]_| <-- vector loop. | | - \ v - >[ ] <--- middle-block. - / | - / v + \ v + >[ ] <--- middle-block. + / | + / v | [ ] <--- new preheader. | | | v | [ ] \ | [ ]_| <-- old scalar loop to handle remainder. - \ | - \ v - >[ ] <-- exit block. + \ | + \ v + >[ ] <-- exit block. ... */ @@ -954,7 +954,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - Value *Cond = createBlockInMask(P->getIncomingBlock(0)); + Value *Cond = createEdgeMask(P->getIncomingBlock(0), P->getParent()); WidenMap[P] = Builder.CreateSelect(Cond, getVectorValue(P->getIncomingValue(0)), @@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - /// Vectorize bitcasts. CastInst *CI = dyn_cast<CastInst>(it); + /// Optimize the special case where the source is the induction + /// variable. Notice that we can only optimize the 'trunc' case + /// because: a. FP conversions lose precision, b. sext/zext may wrap, + /// c. other casts depend on pointer size. + if (CI->getOperand(0) == OldInduction && + it->getOpcode() == Instruction::Trunc) { + Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, + CI->getType()); + Value *Broadcasted = getBroadcastInstrs(ScalarCast); + WidenMap[it] = getConsecutiveVector(Broadcasted); + break; + } + /// Vectorize casts. Value *A = getVectorValue(it->getOperand(0)); Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); @@ -1263,6 +1275,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { BasicBlock *BB = LoopBlocks[i]; + // We don't support switch statements inside loops. + if (!isa<BranchInst>(BB->getTerminator())) + return false; + // We must have at most two predecessors because we need to convert // all PHIs to selects. unsigned Preds = std::distance(pred_begin(BB), pred_end(BB)); @@ -1832,6 +1848,15 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { return NoInduction; } +bool LoopVectorizationLegality::isInductionVariable(const Value *V) { + Value *In0 = const_cast<Value*>(V); + PHINode *PN = dyn_cast_or_null<PHINode>(In0); + if (!PN) + return false; + + return Inductions.count(PN); +} + bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { assert(TheLoop->contains(BB) && "Unknown block used"); @@ -1846,7 +1871,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) return false; - // The isntructions below can trap. + // The instructions below can trap. switch (it->getOpcode()) { default: continue; case Instruction::UDiv: @@ -1870,7 +1895,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { } unsigned -LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { +LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, + unsigned UserVF) { + if (OptForSize && Legal->getRuntimePointerCheck()->Need) { + DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); + return 1; + } + + // Find the trip count. + unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch()); + DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n"); + + unsigned VF = MaxVectorSize; + + // If we optimize the program for size, avoid creating the tail loop. + if (OptForSize) { + // If we are unable to calculate the trip count then don't try to vectorize. + if (TC < 2) { + DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); + return 1; + } + + // Find the maximum SIMD width that can fit within the trip count. + VF = TC % MaxVectorSize; + + if (VF == 0) + VF = MaxVectorSize; + + // If the trip count that we found modulo the vectorization factor is not + // zero then we require a tail. + if (VF < 2) { + DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); + return 1; + } + } + + if (UserVF != 0) { + assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); + DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n"); + + return UserVF; + } + if (!VTTI) { DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n"); return 1; @@ -2052,6 +2118,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { + // We optimize the truncation of induction variable. + // The cost of these is the same as the scalar operation. + if (I->getOpcode() == Instruction::Trunc && + Legal->isInductionVariable(I->getOperand(0))) + return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), + I->getOperand(0)->getType()); + Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h index 9d6d80e22b..e5ef29052e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.h +++ b/lib/Transforms/Vectorize/LoopVectorize.h @@ -320,6 +320,9 @@ public: /// Returns the induction variables found in the loop. InductionList *getInductionVars() { return &Inductions; } + /// Returns True if V is an induction variable in this loop. + bool isInductionVariable(const Value *V); + /// Return true if the block BB needs to be predicated in order for the loop /// to be vectorized. bool blockNeedsPredication(BasicBlock *BB); @@ -420,10 +423,11 @@ public: const VectorTargetTransformInfo *Vtti): TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } - /// Returns the most profitable vectorization factor for the loop that is - /// smaller or equal to the VF argument. This method checks every power - /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize); + /// Returns the most profitable vectorization factor in powers of two. + /// This method checks every power of two up to VF. If UserVF is not ZERO + /// then this vectorization factor will be selected if vectorization is + /// possible. + unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); private: /// Returns the expected execution cost. The unit of the cost does diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 3fb36cadea..19eefd2f87 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -1,4 +1,4 @@ -//===-- Vectorize.cpp -----------------------------------------------------===// + //===-- Vectorize.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // |