aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2012-11-30 17:06:02 +0000
committerAlexander Kornienko <alexfh@google.com>2012-11-30 17:06:02 +0000
commitccaf20cc53a3dc353f4786b1ebf4ebb3f81d4746 (patch)
tree526aace1ee6cd0126263e06eed2d23eae06d7d84 /lib/Transforms/Vectorize/LoopVectorize.cpp
parent26c41c96df38631aa2bce1ff87acb31871111f83 (diff)
parent72a2c0622ab072030c9108badea50074d96bec6a (diff)
Updating branches/google/testing to r168474
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@169005 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp378
1 files changed, 311 insertions, 67 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 892808760f..3f1d82cf5b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -78,6 +78,10 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
/// We don't vectorize loops with a known constant trip count below this number.
const unsigned TinyTripCountThreshold = 16;
+/// When performing a runtime memory check, do not check more than this
+/// number of pointers. Notice that the check is quadratic!
+const unsigned RuntimeMemoryCheckThreshold = 2;
+
namespace {
// Forward declarations.
@@ -114,7 +118,7 @@ public:
/// Widen each instruction in the old loop to a new one in the new loop.
/// Use the Legality module to find the induction and reduction variables.
vectorizeLoop(Legal);
- // register the new loop.
+ // Register the new loop and update the analysis passes.
updateAnalysis();
}
@@ -123,7 +127,8 @@ private:
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
void vectorizeLoop(LoopVectorizationLegality *Legal);
- /// Insert the new loop to the loop hierarchy and pass manager.
+ /// Insert the new loop to the loop hierarchy and pass manager
+ /// and update the analysis passes.
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
@@ -242,10 +247,23 @@ public:
ReductionKind Kind;
};
+ // This POD struct holds information about the memory runtime legality
+ // check that a group of pointers do not overlap.
+ struct RuntimePointerCheck {
+ /// This flag indicates if we need to add the runtime check.
+ bool Need;
+ /// Holds the pointers that we need to check.
+ SmallVector<Value*, 2> Pointers;
+ };
+
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+ /// InductionList saves induction variables and maps them to the initial
+ /// value entring the loop.
+ typedef DenseMap<PHINode*, Value*> InductionList;
+
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
@@ -257,15 +275,23 @@ public:
/// Returns the reduction variables found in the loop.
ReductionList *getReductionVars() { return &Reductions; }
+ /// Returns the induction variables found in the loop.
+ InductionList *getInductionVars() { return &Inductions; }
+
/// Check if the pointer returned by this GEP is consecutive
/// when the index is vectorized. This happens when the last
/// index of the GEP is consecutive, like the induction variable.
/// This check allows us to vectorize A[idx] into a wide load/store.
bool isConsecutiveGep(Value *Ptr);
+ /// Returns true if the value V is uniform within the loop.
+ bool isUniform(Value *V);
+
/// Returns true if this instruction will remain scalar after vectorization.
bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+ /// Returns the information that we collected about runtime memory check.
+ RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; }
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -286,6 +312,8 @@ private:
bool isReductionInstr(Instruction *I, ReductionKind Kind);
/// Returns True, if 'Phi' is an induction variable.
bool isInductionVariable(PHINode *Phi);
+ /// Return true if can compute the address bounds of Ptr within the loop.
+ bool hasComputableBounds(Value *Ptr);
/// The loop that we evaluate.
Loop *TheLoop;
@@ -296,16 +324,25 @@ private:
// --- vectorization state --- //
- /// Holds the induction variable.
+ /// Holds the integer induction variable. This is the counter of the
+ /// loop.
PHINode *Induction;
/// Holds the reduction variables.
ReductionList Reductions;
+ /// Holds all of the induction variables that we found in the loop.
+ /// Notice that inductions don't need to start at zero and that induction
+ /// variables can be pointers.
+ InductionList Inductions;
+
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value*, 4> AllowedExit;
/// This set holds the variables which are known to be uniform after
/// vectorization.
SmallPtrSet<Instruction*, 4> Uniforms;
+ /// We need to check that all of the pointers in this list are disjoint
+ /// at runtime.
+ RuntimePointerCheck PtrRtCheck;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -506,6 +543,10 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
return false;
}
+bool LoopVectorizationLegality::isUniform(Value *V) {
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
// If we saved a vectorized copy of V, use it.
@@ -521,13 +562,7 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
Constant*
SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
- SmallVector<Constant*, 8> Indices;
- // Create a vector of consecutive numbers from zero to VF.
- for (unsigned i = 0; i < VF; ++i)
- Indices.push_back(ConstantInt::get(ScalarTy, Val, true));
-
- // Add the consecutive indices to the vector value.
- return ConstantVector::get(Indices);
+ return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true));
}
void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
@@ -631,13 +666,29 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
...
*/
+ OldInduction = Legal->getInduction();
+ assert(OldInduction && "We must have a single phi node.");
+ Type *IdxTy = OldInduction->getType();
+
+ // Find the loop boundaries.
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+
+ // Get the total trip count from the count by adding 1.
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(ExitCount->getType(), 1));
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ // We are using Zext because it should be less expensive.
+ if (ExitCount->getType() != IdxTy)
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
+
// This is the original scalar-loop preheader.
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
// The loop index does not have to start at Zero. It starts with this value.
- OldInduction = Legal->getInduction();
Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
@@ -655,8 +706,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
"scalar.preheader");
// Find the induction variable.
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
- assert(OldInduction && "We must have a single phi node.");
- Type *IdxTy = OldInduction->getType();
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
@@ -666,25 +715,11 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
Induction = Builder.CreatePHI(IdxTy, 2, "index");
Constant *Step = ConstantInt::get(IdxTy, VF);
- // Find the loop boundaries.
- const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
- assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
-
- // Get the total trip count from the count by adding 1.
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(ExitCount->getType(), 1));
-
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
SCEVExpander Exp(*SE, "induction");
Instruction *Loc = BypassBlock->getTerminator();
- // We may need to extend the index in case there is a type mismatch.
- // We know that the count starts at zero and does not overflow.
- // We are using Zext because it should be less expensive.
- if (ExitCount->getType() != Induction->getType())
- ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
-
// Count holds the overall loop count (N).
Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
@@ -704,15 +739,121 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
IdxEndRoundDown,
StartIdx,
"cmp.zero", Loc);
+
+ LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
+ Legal->getRuntimePointerCheck();
+ Value *MemoryRuntimeCheck = 0;
+ if (PtrRtCheck->Need) {
+ unsigned NumPointers = PtrRtCheck->Pointers.size();
+ SmallVector<Value* , 2> Starts;
+ SmallVector<Value* , 2> Ends;
+
+ // Use this type for pointer arithmetic.
+ Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
+
+ for (unsigned i=0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck->Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, OrigLoop)) {
+ DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
+ const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
+ Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ for (unsigned i=0; i < NumPointers; ++i) {
+ for (unsigned j=i+1; j < NumPointers; ++j) {
+ Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[0], Ends[1], "bound0", Loc);
+ Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[1], Ends[0], "bound1", Loc);
+ Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
+ "found.conflict", Loc);
+ if (MemoryRuntimeCheck) {
+ MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
+ MemoryRuntimeCheck,
+ IsConflict,
+ "conflict.rdx", Loc);
+ } else {
+ MemoryRuntimeCheck = IsConflict;
+ }
+ }
+ }
+ }// end of need-runtime-check code.
+
+ // If we are using memory runtime checks, include them in.
+ if (MemoryRuntimeCheck) {
+ Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck,
+ "CntOrMem", Loc);
+ }
+
BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
// Remove the old terminator.
Loc->eraseFromParent();
+ // We are going to resume the execution of the scalar loop.
+ // Go over all of the induction variables that we found and fix the
+ // PHIs that are left in the scalar version of the loop.
+ // The starting values of PHI nodes depend on the counter of the last
+ // iteration in the vectorized loop.
+ // If we come from a bypass edge then we need to start from the original start
+ // value.
+
+ // This variable saves the new starting index for the scalar loop.
+ Value *ResumeIndex = 0;
+ LoopVectorizationLegality::InductionList::iterator I, E;
+ LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ for (I = List->begin(), E = List->end(); I != E; ++I) {
+ PHINode *OrigPhi = I->first;
+ PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+ MiddleBlock->getTerminator());
+ Value *EndValue = 0;
+ if (OrigPhi->getType()->isIntegerTy()) {
+ // Handle the integer induction counter:
+ assert(OrigPhi == OldInduction && "Unknown integer PHI");
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ } else {
+ // For pointer induction variables, calculate the offset using
+ // the end index.
+ EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown,
+ "ptr.ind.end",
+ BypassBlock->getTerminator());
+ }
+
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ ResumeVal->addIncoming(I->second, BypassBlock);
+ ResumeVal->addIncoming(EndValue, VecBody);
+
+ // Fix the scalar body counter (PHI node).
+ unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ }
+
+ // Make sure that we found the index where scalar loop needs to continue.
+ assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
+ "Invalid resume Index");
+
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
- IdxEndRoundDown, "cmp.n",
+ ResumeIndex, "cmp.n",
MiddleBlock->getTerminator());
BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
@@ -730,10 +871,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Now we have two terminators. Remove the old one from the block.
VecBody->getTerminator()->eraseFromParent();
- // Fix the scalar body iteration count.
- unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
- OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown);
-
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
@@ -803,7 +940,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// add the new incoming edges to the PHI. At this point all of the
// instructions in the basic block are vectorized, so we can use them to
// construct the PHI.
- PhiVector PHIsToFix;
+ PhiVector RdxPHIsToFix;
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
@@ -819,13 +956,40 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Special handling for the induction var.
if (OldInduction == Inst)
continue;
- // This is phase one of vectorizing PHIs.
- // This has to be a reduction variable.
- assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
- Type *VecTy = VectorType::get(Inst->getType(), VF);
- WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
- PHIsToFix.push_back(P);
- continue;
+
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = VectorType::get(Inst->getType(), VF);
+ WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
+ RdxPHIsToFix.push_back(P);
+ continue;
+ }
+
+ // Handle pointer inductions:
+ if (Legal->getInductionVars()->count(P)) {
+ Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction);
+ Value *StartPtr = Legal->getInductionVars()->lookup(P);
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+ // This is the first GEP in the sequence.
+ Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx,
+ "induc.ptr");
+ // This is the vector of results. Notice that we don't generate vector
+ // geps because scalar geps result in better code.
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i),
+ "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
+ }
+
+ WidenMap[Inst] = VecVal;
+ continue;
+ }
}
case Instruction::Add:
case Instruction::FAdd:
@@ -905,7 +1069,12 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
Value *Ptr = SI->getPointerOperand();
unsigned Alignment = SI->getAlignment();
+
+ assert(!Legal->isUniform(Ptr) &&
+ "We do not allow storing to uniform addresses");
+
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
// This store does not use GEPs.
if (!Legal->isConsecutiveGep(Gep)) {
scalarizeInstruction(Inst);
@@ -935,8 +1104,9 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
unsigned Alignment = LI->getAlignment();
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- // We don't have a gep. Scalarize the load.
- if (!Legal->isConsecutiveGep(Gep)) {
+ // If we don't have a gep, or that the pointer is loop invariant,
+ // scalarize the load.
+ if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
scalarizeInstruction(Inst);
break;
}
@@ -994,7 +1164,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Create the 'reduced' values for each of the induction vars.
// The reduced values are the vector values that we scalarize and combine
// after the loop is finished.
- for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
+ for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
it != e; ++it) {
PHINode *RdxPhi = *it;
PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
@@ -1026,7 +1196,6 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *VectorStart = Builder.CreateInsertElement(Identity,
RdxDesc.StartValue, Zero);
-
// Fix the vector-loop phi.
// We created the induction variable so we know that the
// preheader is the first entry.
@@ -1146,12 +1315,6 @@ bool LoopVectorizationLegality::canVectorize() {
BasicBlock *BB = TheLoop->getHeader();
DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
- // Go over each instruction and look at memory deps.
- if (!canVectorizeBlock(*BB)) {
- DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
- return false;
- }
-
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
if (ExitCount == SE->getCouldNotCompute()) {
@@ -1167,7 +1330,15 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
- DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeBlock(*BB)) {
+ DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop" <<
+ (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
+ <<"!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
@@ -1176,23 +1347,33 @@ bool LoopVectorizationLegality::canVectorize() {
}
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+ BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+
// Scan the instructions in the block and look for hazards.
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
Instruction *I = it;
- PHINode *Phi = dyn_cast<PHINode>(I);
- if (Phi) {
+ if (PHINode *Phi = dyn_cast<PHINode>(I)) {
// This should not happen because the loop should be normalized.
if (Phi->getNumIncomingValues() != 2) {
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
- // We only look at integer phi nodes.
- if (!Phi->getType()->isIntegerTy()) {
- DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
+
+ // This is the value coming from the preheader.
+ Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+
+ // We only look at integer and pointer phi nodes.
+ if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
+ DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
+ Inductions[Phi] = StartValue;
+ continue;
+ } else if (!Phi->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
+ // Handle integer PHIs:
if (isInductionVariable(Phi)) {
if (Induction) {
DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
@@ -1200,6 +1381,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
}
DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
Induction = Phi;
+ Inductions[Phi] = StartValue;
continue;
}
if (AddReductionVar(Phi, IntegerAdd)) {
@@ -1304,6 +1486,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
// Holds the Load and Store *instructions*.
ValueVector Loads;
ValueVector Stores;
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
// Scan the BB and collect legal loads and stores.
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
@@ -1361,6 +1545,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
StoreInst *ST = dyn_cast<StoreInst>(*I);
assert(ST && "Bad StoreInst");
Value* Ptr = ST->getPointerOperand();
+
+ if (isUniform(Ptr)) {
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ return false;
+ }
+
// If we did *not* see this pointer before, insert it to
// the read-write list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr))
@@ -1390,6 +1580,39 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
return true;
}
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool RT = true;
+ for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
+ if (hasComputableBounds(*I)) {
+ PtrRtCheck.Pointers.push_back(*I);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ } else {
+ RT = false;
+ break;
+ }
+ for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
+ if (hasComputableBounds(*I)) {
+ PtrRtCheck.Pointers.push_back(*I);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ } else {
+ RT = false;
+ break;
+ }
+
+ // Check that we did not collect too many pointers or found a
+ // unsizeable pointer.
+ if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ PtrRtCheck.Pointers.clear();
+ RT = false;
+ }
+
+ PtrRtCheck.Need = RT;
+
+ if (RT) {
+ DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
+ }
+
// Now that the pointers are in two lists (Reads and ReadWrites), we
// can check that there are no conflicts between each of the writes and
// between the writes to the reads.
@@ -1404,12 +1627,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
it != e; ++it) {
if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
- return false;
+ return RT;
}
if (!WriteObjects.insert(*it)) {
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
<< **it <<"\n");
- return false;
+ return RT;
}
}
TempObjects.clear();
@@ -1422,18 +1645,21 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
it != e; ++it) {
if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
- return false;
+ return RT;
}
if (WriteObjects.count(*it)) {
DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
<< **it <<"\n");
- return false;
+ return RT;
}
}
TempObjects.clear();
}
- // All is okay.
+ // It is safe to vectorize and we don't need any runtime checks.
+ DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
return true;
}
@@ -1527,8 +1753,6 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
case Instruction::Sub:
return Kind == IntegerAdd;
case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::SDiv:
return Kind == IntegerMult;
case Instruction::And:
return Kind == IntegerAnd;
@@ -1540,6 +1764,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
}
bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
// Check that the PHI is consecutive and starts at zero.
const SCEV *PhiScev = SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
@@ -1549,11 +1778,26 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
}
const SCEV *Step = AR->getStepRecurrence(*SE);
- if (!Step->isOne()) {
- DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
+ // Integer inductions need to have a stride of one.
+ if (PhiTy->isIntegerTy())
+ return Step->isOne();
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) return false;
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+ return (C->getValue()->equalsInt(Size));
+}
+
+bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
+ const SCEV *PhiScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR)
return false;
- }
- return true;
+
+ return AR->isAffine();
}
unsigned