diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-12-04 06:15:11 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-12-04 06:15:11 +0000 |
commit | 319d594e22c3db55114f233ca398f9760e3f6ed9 (patch) | |
tree | f388bd59d3d495e88504891426aa7bb62f23c1b6 /lib/Transforms/Vectorize/LoopVectorize.cpp | |
parent | 6c55412ea4b39103a3a5764d49ddfdf50e066d56 (diff) |
Add the last part that is needed for vectorization of if-converted code.
Added the code that actually performs the if-conversion during vectorization.
We can now vectorize this code:
for (int i=0; i<n; ++i) {
unsigned k = 0;
if (a[i] > b[i]) <------ IF inside the loop.
k = k * 5 + 3;
a[i] = k; <---- K is a phi node that becomes vector-select.
}
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169217 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 413 |
1 files changed, 251 insertions, 162 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0e33228cc9..f538e08179 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -134,6 +135,9 @@ public: } private: + /// A small list of PHINodes. + typedef SmallVector<PHINode*, 4> PhiVector; + /// Add code that checks at runtime if the accessed arrays overlap. /// Returns the comperator value or NULL if no check is needed. Value *addRuntimeCheck(LoopVectorizationLegality *Legal, @@ -142,6 +146,19 @@ private: void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); + + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + Value *createBlockInMask(BasicBlock *BB); + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); + + /// A helper function to vectorize a single BB within the innermost loop. + void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, + PhiVector *PV); + /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. void updateAnalysis(); @@ -816,7 +833,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { DL->getIntPtrType(SE->getContext()); // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); // Get the total trip count from the count by adding 1. @@ -838,7 +855,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { OldInduction->getIncomingValueForBlock(BypassBlock): ConstantInt::get(IdxTy, 0); - assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); // Generate the code that checks in runtime if arrays overlap. @@ -1044,7 +1060,6 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - typedef SmallVector<PHINode*, 4> PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); Constant *Zero = ConstantInt::get( IntegerType::getInt32Ty(BB.getContext()), 0); @@ -1059,24 +1074,220 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // construct the PHI. PhiVector RdxPHIsToFix; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *Inst = it; + // Scan the loop in a topological order to ensure that defs are vectorized + // before users. + LoopBlocksDFS DFS(OrigLoop); + DFS.perform(LI); + + // Vectorize all of the blocks in the original loop. + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), + be = DFS.endRPO(); bb != be; ++bb) + vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix); + + // At this point every instruction in the original loop is widened to + // a vector form. We are almost done. Now, we need to fix the PHI nodes + // that we vectorized. The PHI nodes are currently empty because we did + // not want to introduce cycles. Notice that the remaining PHI nodes + // that we need to fix are reduction variables. + + // Create the 'reduced' values for each of the induction vars. + // The reduced values are the vector values that we scalarize and combine + // after the loop is finished. + for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end(); + it != e; ++it) { + PHINode *RdxPhi = *it; + PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]); + assert(RdxPhi && "Unable to recover vectorized PHI"); + + // Find the reduction variable descriptor. + assert(Legal->getReductionVars()->count(RdxPhi) && + "Unable to find the reduction variable"); + LoopVectorizationLegality::ReductionDescriptor RdxDesc = + (*Legal->getReductionVars())[RdxPhi]; + + // We need to generate a reduction vector from the incoming scalar. + // To do so, we need to generate the 'identity' vector and overide + // one of the elements with the incoming scalar reduction. We need + // to do it in the vector-loop preheader. + Builder.SetInsertPoint(LoopBypassBlock->getTerminator()); + + // This is the vector-clone of the value that leaves the loop. + Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr); + Type *VecTy = VectorExit->getType(); + + // Find the reduction identity variable. Zero for addition, or, xor, + // one for multiplication, -1 for And. + Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind), + VecTy->getScalarType()); + + // This vector is the Identity vector where the first element is the + // incoming scalar reduction. + Value *VectorStart = Builder.CreateInsertElement(Identity, + RdxDesc.StartValue, Zero); + + // Fix the vector-loop phi. + // We created the induction variable so we know that the + // preheader is the first entry. + BasicBlock *VecPreheader = Induction->getIncomingBlock(0); + + // Reductions do not have to start at zero. They can start with + // any loop invariant values. + VecRdxPhi->addIncoming(VectorStart, VecPreheader); + unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); + Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); + VecRdxPhi->addIncoming(Val, LoopVectorBody); + + // Before each round, move the insertion point right between + // the PHIs and the values we are going to write. + // This allows us to write both PHINodes and the extractelement + // instructions. + Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt()); + + // This PHINode contains the vectorized reduction variable, or + // the initial value vector, if we bypass the vector loop. + PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); + NewPhi->addIncoming(VectorStart, LoopBypassBlock); + NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody); + + // Extract the first scalar. + Value *Scalar0 = + Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); + // Extract and reduce the remaining vector elements. + for (unsigned i=1; i < VF; ++i) { + Value *Scalar1 = + Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); + switch (RdxDesc.Kind) { + case LoopVectorizationLegality::IntegerAdd: + Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerMult: + Scalar0 = Builder.CreateMul(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerOr: + Scalar0 = Builder.CreateOr(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerAnd: + Scalar0 = Builder.CreateAnd(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerXor: + Scalar0 = Builder.CreateXor(Scalar0, Scalar1); + break; + default: + llvm_unreachable("Unknown reduction operation"); + } + } + + // Now, we need to fix the users of the reduction variable + // inside and outside of the scalar remainder loop. + // We know that the loop is in LCSSA form. We need to update the + // PHI nodes in the exit blocks. + for (BasicBlock::iterator LEI = LoopExitBlock->begin(), + LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) { + PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI); + if (!LCSSAPhi) continue; + + // All PHINodes need to have a single entry edge, or two if + // we already fixed them. + assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); + + // We found our reduction value exit-PHI. Update it with the + // incoming bypass edge. + if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) { + // Add an edge coming from the bypass. + LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock); + break; + } + }// end of the LCSSA phi scan. + + // Fix the scalar loop reduction variable with the incoming reduction sum + // from the vector body and from the backedge value. + int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); + int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. + (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); + (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); + }// end of for each redux variable. +} + +Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { + assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) && + "Invalid edge"); + + Value *SrcMask = createBlockInMask(Src); + + // The terminator has to be a branch inst! + BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator()); + assert(BI && "Unexpected terminator found"); + + Value *EdgeMask = SrcMask; + if (BI->isConditional()) { + EdgeMask = getVectorValue(BI->getCondition()); + if (BI->getSuccessor(0) != Dst) + EdgeMask = Builder.CreateNot(EdgeMask); + } + + return Builder.CreateAnd(EdgeMask, SrcMask); +} - switch (Inst->getOpcode()) { +Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { + assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + + // Loop incoming mask is all-one. + if (OrigLoop->getHeader() == BB) + return getVectorValue( + ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1)); + + // This is the block mask. We OR all incoming edges, and with zero. + Value *BlockMask = getVectorValue( + ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0)); + + // For each pred: + for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) + BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB)); + + return BlockMask; +} + +void +InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, + BasicBlock *BB, PhiVector *PV) { + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0); + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + switch (it->getOpcode()) { case Instruction::Br: // Nothing to do for PHIs and BR, since we already took care of the // loop control flow instructions. continue; case Instruction::PHI:{ - PHINode* P = cast<PHINode>(Inst); + PHINode* P = cast<PHINode>(it); // Handle reduction variables: if (Legal->getReductionVars()->count(P)) { // This is phase one of vectorizing PHIs. - Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi", - LoopVectorBody->getFirstInsertionPt()); - RdxPHIsToFix.push_back(P); + Type *VecTy = VectorType::get(it->getType(), VF); + WidenMap[it] = + PHINode::Create(VecTy, 2, "vec.phi", + LoopVectorBody->getFirstInsertionPt()); + PV->push_back(P); + continue; + } + + // Check for PHI nodes that are lowered to vector selects. + if (P->getParent() != OrigLoop->getHeader()) { + // We know that all PHIs in non header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + Value *Cond = createBlockInMask(P->getIncomingBlock(0)); + WidenMap[P] = + Builder.CreateSelect(Cond, + getVectorValue(P->getIncomingValue(0)), + getVectorValue(P->getIncomingValue(1)), + "predphi"); continue; } @@ -1099,8 +1310,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Handle pointer inductions. assert(P->getType()->isPointerTy() && "Unexpected type."); Value *StartIdx = OldInduction ? - Legal->getInductionVars()->lookup(OldInduction) : - ConstantInt::get(Induction->getType(), 0); + Legal->getInductionVars()->lookup(OldInduction) : + ConstantInt::get(Induction->getType(), 0); // This is the pointer value coming into the loop. Value *StartPtr = Legal->getInductionVars()->lookup(P); @@ -1121,7 +1332,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { "insert.gep"); } - WidenMap[Inst] = VecVal; + WidenMap[it] = VecVal; continue; } case Instruction::Add: @@ -1143,13 +1354,13 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { case Instruction::Or: case Instruction::Xor: { // Just widen binops. - BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); // Use this vector value for all users of the original instruction. Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); - WidenMap[Inst] = V; + WidenMap[it] = V; // Update the NSW, NUW and Exact flags. BinaryOperator *VecOp = cast<BinaryOperator>(V); @@ -1165,7 +1376,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. - Value *Cond = Inst->getOperand(0); + Value *Cond = it->getOperand(0); bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); // The condition can be loop invariant but still defined inside the @@ -1176,29 +1387,29 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { if (InvariantCond) Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); - Value *Op0 = getVectorValue(Inst->getOperand(1)); - Value *Op1 = getVectorValue(Inst->getOperand(2)); - WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1); + Value *Op0 = getVectorValue(it->getOperand(1)); + Value *Op1 = getVectorValue(it->getOperand(2)); + WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1); break; } case Instruction::ICmp: case Instruction::FCmp: { // Widen compares. Generate vector compares. - bool FCmp = (Inst->getOpcode() == Instruction::FCmp); - CmpInst *Cmp = dyn_cast<CmpInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); + bool FCmp = (it->getOpcode() == Instruction::FCmp); + CmpInst *Cmp = dyn_cast<CmpInst>(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); if (FCmp) - WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); else - WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B); + WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B); break; } case Instruction::Store: { // Attempt to issue a wide store. - StoreInst *SI = dyn_cast<StoreInst>(Inst); + StoreInst *SI = dyn_cast<StoreInst>(it); Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); Value *Ptr = SI->getPointerOperand(); unsigned Alignment = SI->getAlignment(); @@ -1210,7 +1421,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // This store does not use GEPs. if (!Legal->isConsecutivePtr(Ptr)) { - scalarizeInstruction(Inst); + scalarizeInstruction(it); break; } @@ -1237,7 +1448,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { } case Instruction::Load: { // Attempt to issue a wide load. - LoadInst *LI = dyn_cast<LoadInst>(Inst); + LoadInst *LI = dyn_cast<LoadInst>(it); Type *RetTy = VectorType::get(LI->getType(), VF); Value *Ptr = LI->getPointerOperand(); unsigned Alignment = LI->getAlignment(); @@ -1247,7 +1458,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // scalarize the load. bool Con = Legal->isConsecutivePtr(Ptr); if (Legal->isUniform(Ptr) || !Con) { - scalarizeInstruction(Inst); + scalarizeInstruction(it); break; } @@ -1272,7 +1483,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { LI = Builder.CreateLoad(Ptr); LI->setAlignment(Alignment); // Use this vector value for all users of the load. - WidenMap[Inst] = LI; + WidenMap[it] = LI; break; } case Instruction::ZExt: @@ -1288,144 +1499,22 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { case Instruction::FPTrunc: case Instruction::BitCast: { /// Vectorize bitcasts. - CastInst *CI = dyn_cast<CastInst>(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); + CastInst *CI = dyn_cast<CastInst>(it); + Value *A = getVectorValue(it->getOperand(0)); Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); - WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy); + WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); break; } - + default: /// All other instructions are unsupported. Scalarize them. - scalarizeInstruction(Inst); + scalarizeInstruction(it); break; }// end of switch. }// end of for_each instr. - - // At this point every instruction in the original loop is widended to - // a vector form. We are almost done. Now, we need to fix the PHI nodes - // that we vectorized. The PHI nodes are currently empty because we did - // not want to introduce cycles. Notice that the remaining PHI nodes - // that we need to fix are reduction variables. - - // Create the 'reduced' values for each of the induction vars. - // The reduced values are the vector values that we scalarize and combine - // after the loop is finished. - for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end(); - it != e; ++it) { - PHINode *RdxPhi = *it; - PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]); - assert(RdxPhi && "Unable to recover vectorized PHI"); - - // Find the reduction variable descriptor. - assert(Legal->getReductionVars()->count(RdxPhi) && - "Unable to find the reduction variable"); - LoopVectorizationLegality::ReductionDescriptor RdxDesc = - (*Legal->getReductionVars())[RdxPhi]; - - // We need to generate a reduction vector from the incoming scalar. - // To do so, we need to generate the 'identity' vector and overide - // one of the elements with the incoming scalar reduction. We need - // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlock->getTerminator()); - - // This is the vector-clone of the value that leaves the loop. - Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr); - Type *VecTy = VectorExit->getType(); - - // Find the reduction identity variable. Zero for addition, or, xor, - // one for multiplication, -1 for And. - Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind), - VecTy->getScalarType()); - - // This vector is the Identity vector where the first element is the - // incoming scalar reduction. - Value *VectorStart = Builder.CreateInsertElement(Identity, - RdxDesc.StartValue, Zero); - - // Fix the vector-loop phi. - // We created the induction variable so we know that the - // preheader is the first entry. - BasicBlock *VecPreheader = Induction->getIncomingBlock(0); - - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - VecRdxPhi->addIncoming(VectorStart, VecPreheader); - unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); - VecRdxPhi->addIncoming(Val, LoopVectorBody); - - // Before each round, move the insertion point right between - // the PHIs and the values we are going to write. - // This allows us to write both PHINodes and the extractelement - // instructions. - Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt()); - - // This PHINode contains the vectorized reduction variable, or - // the initial value vector, if we bypass the vector loop. - PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); - NewPhi->addIncoming(VectorStart, LoopBypassBlock); - NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody); - - // Extract the first scalar. - Value *Scalar0 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); - // Extract and reduce the remaining vector elements. - for (unsigned i=1; i < VF; ++i) { - Value *Scalar1 = - Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); - switch (RdxDesc.Kind) { - case LoopVectorizationLegality::IntegerAdd: - Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerMult: - Scalar0 = Builder.CreateMul(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerOr: - Scalar0 = Builder.CreateOr(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerAnd: - Scalar0 = Builder.CreateAnd(Scalar0, Scalar1); - break; - case LoopVectorizationLegality::IntegerXor: - Scalar0 = Builder.CreateXor(Scalar0, Scalar1); - break; - default: - llvm_unreachable("Unknown reduction operation"); - } - } - - // Now, we need to fix the users of the reduction variable - // inside and outside of the scalar remainder loop. - // We know that the loop is in LCSSA form. We need to update the - // PHI nodes in the exit blocks. - for (BasicBlock::iterator LEI = LoopExitBlock->begin(), - LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) { - PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI); - if (!LCSSAPhi) continue; - - // All PHINodes need to have a single entry edge, or two if - // we already fixed them. - assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); - - // We found our reduction value exit-PHI. Update it with the - // incoming bypass edge. - if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) { - // Add an edge coming from the bypass. - LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock); - break; - } - }// end of the LCSSA phi scan. - - // Fix the scalar loop reduction variable with the incoming reduction sum - // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. - (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); - (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); - }// end of for each redux variable. } + void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. SE->forgetLoop(OrigLoop); |