diff options
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 378 |
1 files changed, 311 insertions, 67 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 892808760f..3f1d82cf5b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -78,6 +78,10 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, /// We don't vectorize loops with a known constant trip count below this number. const unsigned TinyTripCountThreshold = 16; +/// When performing a runtime memory check, do not check more than this +/// number of pointers. Notice that the check is quadratic! +const unsigned RuntimeMemoryCheckThreshold = 2; + namespace { // Forward declarations. @@ -114,7 +118,7 @@ public: /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. vectorizeLoop(Legal); - // register the new loop. + // Register the new loop and update the analysis passes. updateAnalysis(); } @@ -123,7 +127,8 @@ private: void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); - /// Insert the new loop to the loop hierarchy and pass manager. + /// Insert the new loop to the loop hierarchy and pass manager + /// and update the analysis passes. void updateAnalysis(); /// This instruction is un-vectorizable. Implement it as a sequence @@ -242,10 +247,23 @@ public: ReductionKind Kind; }; + // This POD struct holds information about the memory runtime legality + // check that a group of pointers do not overlap. + struct RuntimePointerCheck { + /// This flag indicates if we need to add the runtime check. + bool Need; + /// Holds the pointers that we need to check. + SmallVector<Value*, 2> Pointers; + }; + /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; + /// InductionList saves induction variables and maps them to the initial + /// value entring the loop. + typedef DenseMap<PHINode*, Value*> InductionList; + /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. @@ -257,15 +275,23 @@ public: /// Returns the reduction variables found in the loop. ReductionList *getReductionVars() { return &Reductions; } + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + /// Check if the pointer returned by this GEP is consecutive /// when the index is vectorized. This happens when the last /// index of the GEP is consecutive, like the induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + /// Returns true if this instruction will remain scalar after vectorization. bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + /// Returns the information that we collected about runtime memory check. + RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -286,6 +312,8 @@ private: bool isReductionInstr(Instruction *I, ReductionKind Kind); /// Returns True, if 'Phi' is an induction variable. bool isInductionVariable(PHINode *Phi); + /// Return true if can compute the address bounds of Ptr within the loop. + bool hasComputableBounds(Value *Ptr); /// The loop that we evaluate. Loop *TheLoop; @@ -296,16 +324,25 @@ private: // --- vectorization state --- // - /// Holds the induction variable. + /// Holds the integer induction variable. This is the counter of the + /// loop. PHINode *Induction; /// Holds the reduction variables. ReductionList Reductions; + /// Holds all of the induction variables that we found in the loop. + /// Notice that inductions don't need to start at zero and that induction + /// variables can be pointers. + InductionList Inductions; + /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. SmallPtrSet<Value*, 4> AllowedExit; /// This set holds the variables which are known to be uniform after /// vectorization. SmallPtrSet<Instruction*, 4> Uniforms; + /// We need to check that all of the pointers in this list are disjoint + /// at runtime. + RuntimePointerCheck PtrRtCheck; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -506,6 +543,10 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { return false; } +bool LoopVectorizationLegality::isUniform(Value *V) { + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +} + Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { assert(!V->getType()->isVectorTy() && "Can't widen a vector"); // If we saved a vectorized copy of V, use it. @@ -521,13 +562,7 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { Constant* SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { - SmallVector<Constant*, 8> Indices; - // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VF; ++i) - Indices.push_back(ConstantInt::get(ScalarTy, Val, true)); - - // Add the consecutive indices to the vector value. - return ConstantVector::get(Indices); + return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true)); } void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { @@ -631,13 +666,29 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ... */ + OldInduction = Legal->getInduction(); + assert(OldInduction && "We must have a single phi node."); + Type *IdxTy = OldInduction->getType(); + + // Find the loop boundaries. + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); + + // Get the total trip count from the count by adding 1. + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(ExitCount->getType(), 1)); + // We may need to extend the index in case there is a type mismatch. + // We know that the count starts at zero and does not overflow. + // We are using Zext because it should be less expensive. + if (ExitCount->getType() != IdxTy) + ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); + // This is the original scalar-loop preheader. BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); // The loop index does not have to start at Zero. It starts with this value. - OldInduction = Legal->getInduction(); Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock); assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); @@ -655,8 +706,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { "scalar.preheader"); // Find the induction variable. BasicBlock *OldBasicBlock = OrigLoop->getHeader(); - assert(OldInduction && "We must have a single phi node."); - Type *IdxTy = OldInduction->getType(); // Use this IR builder to create the loop instructions (Phi, Br, Cmp) // inside the loop. @@ -666,25 +715,11 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Induction = Builder.CreatePHI(IdxTy, 2, "index"); Constant *Step = ConstantInt::get(IdxTy, VF); - // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); - assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); - - // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(ExitCount->getType(), 1)); - // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. SCEVExpander Exp(*SE, "induction"); Instruction *Loc = BypassBlock->getTerminator(); - // We may need to extend the index in case there is a type mismatch. - // We know that the count starts at zero and does not overflow. - // We are using Zext because it should be less expensive. - if (ExitCount->getType() != Induction->getType()) - ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); - // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); @@ -704,15 +739,121 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { IdxEndRoundDown, StartIdx, "cmp.zero", Loc); + + LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = + Legal->getRuntimePointerCheck(); + Value *MemoryRuntimeCheck = 0; + if (PtrRtCheck->Need) { + unsigned NumPointers = PtrRtCheck->Pointers.size(); + SmallVector<Value* , 2> Starts; + SmallVector<Value* , 2> Ends; + + // Use this type for pointer arithmetic. + Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType(); + + for (unsigned i=0; i < NumPointers; ++i) { + Value *Ptr = PtrRtCheck->Pointers[i]; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, OrigLoop)) { + DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" << + *Ptr <<"\n"); + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } else { + DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n"); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc); + const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range."); + Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + for (unsigned i=0; i < NumPointers; ++i) { + for (unsigned j=i+1; j < NumPointers; ++j) { + Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Starts[0], Ends[1], "bound0", Loc); + Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Starts[1], Ends[0], "bound1", Loc); + Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, + "found.conflict", Loc); + if (MemoryRuntimeCheck) { + MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, + MemoryRuntimeCheck, + IsConflict, + "conflict.rdx", Loc); + } else { + MemoryRuntimeCheck = IsConflict; + } + } + } + }// end of need-runtime-check code. + + // If we are using memory runtime checks, include them in. + if (MemoryRuntimeCheck) { + Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck, + "CntOrMem", Loc); + } + BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); // Remove the old terminator. Loc->eraseFromParent(); + // We are going to resume the execution of the scalar loop. + // Go over all of the induction variables that we found and fix the + // PHIs that are left in the scalar version of the loop. + // The starting values of PHI nodes depend on the counter of the last + // iteration in the vectorized loop. + // If we come from a bypass edge then we need to start from the original start + // value. + + // This variable saves the new starting index for the scalar loop. + Value *ResumeIndex = 0; + LoopVectorizationLegality::InductionList::iterator I, E; + LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); + for (I = List->begin(), E = List->end(); I != E; ++I) { + PHINode *OrigPhi = I->first; + PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val", + MiddleBlock->getTerminator()); + Value *EndValue = 0; + if (OrigPhi->getType()->isIntegerTy()) { + // Handle the integer induction counter: + assert(OrigPhi == OldInduction && "Unknown integer PHI"); + // We know what the end value is. + EndValue = IdxEndRoundDown; + // We also know which PHI node holds it. + ResumeIndex = ResumeVal; + } else { + // For pointer induction variables, calculate the offset using + // the end index. + EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown, + "ptr.ind.end", + BypassBlock->getTerminator()); + } + + // The new PHI merges the original incoming value, in case of a bypass, + // or the value at the end of the vectorized loop. + ResumeVal->addIncoming(I->second, BypassBlock); + ResumeVal->addIncoming(EndValue, VecBody); + + // Fix the scalar body counter (PHI node). + unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); + OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + } + + // Make sure that we found the index where scalar loop needs to continue. + assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() && + "Invalid resume Index"); + // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd, - IdxEndRoundDown, "cmp.n", + ResumeIndex, "cmp.n", MiddleBlock->getTerminator()); BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); @@ -730,10 +871,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Now we have two terminators. Remove the old one from the block. VecBody->getTerminator()->eraseFromParent(); - // Fix the scalar body iteration count. - unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); - OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown); - // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); @@ -803,7 +940,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // add the new incoming edges to the PHI. At this point all of the // instructions in the basic block are vectorized, so we can use them to // construct the PHI. - PhiVector PHIsToFix; + PhiVector RdxPHIsToFix; // For each instruction in the old loop. for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { @@ -819,13 +956,40 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Special handling for the induction var. if (OldInduction == Inst) continue; - // This is phase one of vectorizing PHIs. - // This has to be a reduction variable. - assert(Legal->getReductionVars()->count(P) && "Not a Reduction"); - Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi"); - PHIsToFix.push_back(P); - continue; + + // Handle reduction variables: + if (Legal->getReductionVars()->count(P)) { + // This is phase one of vectorizing PHIs. + Type *VecTy = VectorType::get(Inst->getType(), VF); + WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi"); + RdxPHIsToFix.push_back(P); + continue; + } + + // Handle pointer inductions: + if (Legal->getInductionVars()->count(P)) { + Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction); + Value *StartPtr = Legal->getInductionVars()->lookup(P); + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, + "normalized.idx"); + // This is the first GEP in the sequence. + Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx, + "induc.ptr"); + // This is the vector of results. Notice that we don't generate vector + // geps because scalar geps result in better code. + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i), + "next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), + "insert.gep"); + } + + WidenMap[Inst] = VecVal; + continue; + } } case Instruction::Add: case Instruction::FAdd: @@ -905,7 +1069,12 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); Value *Ptr = SI->getPointerOperand(); unsigned Alignment = SI->getAlignment(); + + assert(!Legal->isUniform(Ptr) && + "We do not allow storing to uniform addresses"); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + // This store does not use GEPs. if (!Legal->isConsecutiveGep(Gep)) { scalarizeInstruction(Inst); @@ -935,8 +1104,9 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { unsigned Alignment = LI->getAlignment(); GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - // We don't have a gep. Scalarize the load. - if (!Legal->isConsecutiveGep(Gep)) { + // If we don't have a gep, or that the pointer is loop invariant, + // scalarize the load. + if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) { scalarizeInstruction(Inst); break; } @@ -994,7 +1164,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Create the 'reduced' values for each of the induction vars. // The reduced values are the vector values that we scalarize and combine // after the loop is finished. - for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end(); + for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end(); it != e; ++it) { PHINode *RdxPhi = *it; PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]); @@ -1026,7 +1196,6 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Value *VectorStart = Builder.CreateInsertElement(Identity, RdxDesc.StartValue, Zero); - // Fix the vector-loop phi. // We created the induction variable so we know that the // preheader is the first entry. @@ -1146,12 +1315,6 @@ bool LoopVectorizationLegality::canVectorize() { BasicBlock *BB = TheLoop->getHeader(); DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); - // Go over each instruction and look at memory deps. - if (!canVectorizeBlock(*BB)) { - DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); - return false; - } - // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); if (ExitCount == SE->getCouldNotCompute()) { @@ -1167,7 +1330,15 @@ bool LoopVectorizationLegality::canVectorize() { return false; } - DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); + // Go over each instruction and look at memory deps. + if (!canVectorizeBlock(*BB)) { + DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); + return false; + } + + DEBUG(dbgs() << "LV: We can vectorize this loop" << + (PtrRtCheck.Need ? " (with a runtime bound check)" : "") + <<"!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with @@ -1176,23 +1347,33 @@ bool LoopVectorizationLegality::canVectorize() { } bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { + BasicBlock *PreHeader = TheLoop->getLoopPreheader(); + // Scan the instructions in the block and look for hazards. for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { Instruction *I = it; - PHINode *Phi = dyn_cast<PHINode>(I); - if (Phi) { + if (PHINode *Phi = dyn_cast<PHINode>(I)) { // This should not happen because the loop should be normalized. if (Phi->getNumIncomingValues() != 2) { DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } - // We only look at integer phi nodes. - if (!Phi->getType()->isIntegerTy()) { - DEBUG(dbgs() << "LV: Found an non-int PHI.\n"); + + // This is the value coming from the preheader. + Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + + // We only look at integer and pointer phi nodes. + if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) { + DEBUG(dbgs() << "LV: Found a pointer induction variable.\n"); + Inductions[Phi] = StartValue; + continue; + } else if (!Phi->getType()->isIntegerTy()) { + DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; } + // Handle integer PHIs: if (isInductionVariable(Phi)) { if (Induction) { DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); @@ -1200,6 +1381,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { } DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n"); Induction = Phi; + Inductions[Phi] = StartValue; continue; } if (AddReductionVar(Phi, IntegerAdd)) { @@ -1304,6 +1486,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // Holds the Load and Store *instructions*. ValueVector Loads; ValueVector Stores; + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; // Scan the BB and collect legal loads and stores. for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { @@ -1361,6 +1545,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { StoreInst *ST = dyn_cast<StoreInst>(*I); assert(ST && "Bad StoreInst"); Value* Ptr = ST->getPointerOperand(); + + if (isUniform(Ptr)) { + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } + // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) @@ -1390,6 +1580,39 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { return true; } + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool RT = true; + for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.Pointers.push_back(*I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.Pointers.push_back(*I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + + // Check that we did not collect too many pointers or found a + // unsizeable pointer. + if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { + PtrRtCheck.Pointers.clear(); + RT = false; + } + + PtrRtCheck.Need = RT; + + if (RT) { + DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n"); + } + // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. @@ -1404,12 +1627,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); - return false; + return RT; } if (!WriteObjects.insert(*it)) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); @@ -1422,18 +1645,21 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); - return false; + return RT; } if (WriteObjects.count(*it)) { DEBUG(dbgs() << "LV: Found a possible read/write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); } - // All is okay. + // It is safe to vectorize and we don't need any runtime checks. + DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n"); + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; return true; } @@ -1527,8 +1753,6 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, case Instruction::Sub: return Kind == IntegerAdd; case Instruction::Mul: - case Instruction::UDiv: - case Instruction::SDiv: return Kind == IntegerMult; case Instruction::And: return Kind == IntegerAnd; @@ -1540,6 +1764,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, } bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + // Check that the PHI is consecutive and starts at zero. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); @@ -1549,11 +1778,26 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { } const SCEV *Step = AR->getStepRecurrence(*SE); - if (!Step->isOne()) { - DEBUG(dbgs() << "LV: PHI stride does not equal one.\n"); + // Integer inductions need to have a stride of one. + if (PhiTy->isIntegerTy()) + return Step->isOne(); + + // Calculate the pointer stride and check if it is consecutive. + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) return false; + + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); + return (C->getValue()->equalsInt(Size)); +} + +bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { + const SCEV *PhiScev = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) return false; - } - return true; + + return AR->isAffine(); } unsigned |