aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp696
1 files changed, 517 insertions, 179 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 28ecb98e5e..6df3e782c8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -13,9 +13,6 @@
// access for the first iteration, and then creating a new GEP instruction in
// the loop to increment the value by the appropriate amount.
//
-// There are currently several deficiencies in the implementation, marked with
-// FIXME in the code.
-//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
@@ -25,10 +22,13 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
#include <set>
using namespace llvm;
@@ -50,11 +50,40 @@ namespace {
std::map<Value *, GEPCache> Map;
};
+ struct IVUse {
+ /// Users - Keep track of all of the users of this stride as well as the
+ /// initial value.
+ std::vector<std::pair<SCEVHandle, Instruction*> > Users;
+ std::vector<Instruction *> UserOperands;
+
+ void addUser(SCEVHandle &SH, Instruction *U, Instruction *V) {
+ Users.push_back(std::make_pair(SH, U));
+ UserOperands.push_back(V);
+ }
+ };
+
+
class LoopStrengthReduce : public FunctionPass {
LoopInfo *LI;
DominatorSet *DS;
+ ScalarEvolution *SE;
+ const TargetData *TD;
+ const Type *UIntPtrTy;
bool Changed;
unsigned MaxTargetAMSize;
+
+ /// IVUsesByStride - Keep track of all uses of induction variables that we
+ /// are interested in. The key of the map is the stride of the access.
+ std::map<Value*, IVUse> IVUsesByStride;
+
+ /// CastedBasePointers - As we need to lower getelementptr instructions, we
+ /// cast the pointer input to uintptr_t. This keeps track of the casted
+ /// values for the pointers we have processed so far.
+ std::map<Value*, Value*> CastedBasePointers;
+
+ /// DeadInsts - Keep track of instructions we may have made dead, so that
+ /// we can remove them after we are done working.
+ std::set<Instruction*> DeadInsts;
public:
LoopStrengthReduce(unsigned MTAMS = 1)
: MaxTargetAMSize(MTAMS) {
@@ -63,6 +92,9 @@ namespace {
virtual bool runOnFunction(Function &) {
LI = &getAnalysis<LoopInfo>();
DS = &getAnalysis<DominatorSet>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = &getAnalysis<TargetData>();
+ UIntPtrTy = TD->getIntPtrType();
Changed = false;
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
@@ -76,9 +108,17 @@ namespace {
AU.addRequired<LoopInfo>();
AU.addRequired<DominatorSet>();
AU.addRequired<TargetData>();
+ AU.addRequired<ScalarEvolution>();
}
private:
void runOnLoop(Loop *L);
+ bool AddUsersIfInteresting(Instruction *I, Loop *L);
+ void AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP, Instruction *I,
+ Loop *L);
+
+ void StrengthReduceStridedIVUsers(Value *Stride, IVUse &Uses, Loop *L,
+ bool isOnlyStride);
+
void strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
GEPCache* GEPCache,
Instruction *InsertBefore,
@@ -111,207 +151,505 @@ DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
}
}
-void LoopStrengthReduce::strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
- GEPCache *Cache,
- Instruction *InsertBefore,
- std::set<Instruction*> &DeadInsts) {
- // We will strength reduce the GEP by splitting it into two parts. The first
- // is a GEP to hold the initial value of the non-strength-reduced GEP upon
- // entering the loop, which we will insert at the end of the loop preheader.
- // The second is a GEP to hold the incremented value of the initial GEP.
- // The LoopIndVarSimplify pass guarantees that loop counts start at zero, so
- // we will replace the indvar with a constant zero value to create the first
- // GEP.
- //
- // We currently only handle GEP instructions that consist of zero or more
- // constants or loop invariable expressions prior to an instance of the
- // canonical induction variable.
- unsigned indvar = 0;
- std::vector<Value *> pre_op_vector;
- std::vector<Value *> inc_op_vector;
- const Type *ty = GEPI->getOperand(0)->getType();
- Value *CanonicalIndVar = L->getCanonicalInductionVariable();
- BasicBlock *Header = L->getHeader();
- BasicBlock *Preheader = L->getLoopPreheader();
- bool AllConstantOperands = true;
- Cache = Cache->get(GEPI->getOperand(0));
-
- for (unsigned op = 1, e = GEPI->getNumOperands(); op != e; ++op) {
- Value *operand = GEPI->getOperand(op);
- if (ty->getTypeID() == Type::StructTyID) {
- assert(isa<ConstantUInt>(operand));
- ConstantUInt *c = dyn_cast<ConstantUInt>(operand);
- ty = ty->getContainedType(unsigned(c->getValue()));
- } else {
- ty = ty->getContainedType(0);
+
+/// CanReduceSCEV - Return true if we can strength reduce this scalar evolution
+/// in the specified loop.
+static bool CanReduceSCEV(const SCEVHandle &SH, Loop *L) {
+ SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH);
+ if (!AddRec || AddRec->getLoop() != L) return false;
+
+ // FIXME: Generalize to non-affine IV's.
+ if (!AddRec->isAffine()) return false;
+
+ // FIXME: generalize to IV's with more complex strides (must emit stride
+ // expression outside of loop!)
+ if (isa<SCEVConstant>(AddRec->getOperand(1)))
+ return true;
+
+ // We handle steps by unsigned values, because we know we won't have to insert
+ // a cast for them.
+ if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(AddRec->getOperand(1)))
+ if (SU->getValue()->getType()->isUnsigned())
+ return true;
+
+ // Otherwise, no, we can't handle it yet.
+ return false;
+}
+
+
+/// GetAdjustedIndex - Adjust the specified GEP sequential type index to match
+/// the size of the pointer type, and scale it by the type size.
+static SCEVHandle GetAdjustedIndex(const SCEVHandle &Idx, uint64_t TySize,
+ const Type *UIntPtrTy) {
+ SCEVHandle Result = Idx;
+ if (Result->getType()->getUnsignedVersion() != UIntPtrTy) {
+ if (UIntPtrTy->getPrimitiveSize() < Result->getType()->getPrimitiveSize())
+ Result = SCEVTruncateExpr::get(Result, UIntPtrTy);
+ else
+ Result = SCEVZeroExtendExpr::get(Result, UIntPtrTy);
+ }
+
+ // This index is scaled by the type size being indexed.
+ if (TySize != 1)
+ Result = SCEVMulExpr::get(Result,
+ SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
+ TySize)));
+ return Result;
+}
+
+/// AnalyzeGetElementPtrUsers - Analyze all of the users of the specified
+/// getelementptr instruction, adding them to the IVUsesByStride table. Note
+/// that we only want to analyze a getelementptr instruction once, and it can
+/// have multiple operands that are uses of the indvar (e.g. A[i][i]). Because
+/// of this, we only process a GEP instruction if its first recurrent operand is
+/// "op", otherwise we will either have already processed it or we will sometime
+/// later.
+void LoopStrengthReduce::AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP,
+ Instruction *Op, Loop *L) {
+ // Analyze all of the subscripts of this getelementptr instruction, looking
+ // for uses that are determined by the trip count of L. First, skip all
+ // operands the are not dependent on the IV.
+
+ // Build up the base expression. Insert an LLVM cast of the pointer to
+ // uintptr_t first.
+ Value *BasePtr;
+ if (Constant *CB = dyn_cast<Constant>(GEP->getOperand(0)))
+ BasePtr = ConstantExpr::getCast(CB, UIntPtrTy);
+ else {
+ Value *&BP = CastedBasePointers[GEP->getOperand(0)];
+ if (BP == 0) {
+ BasicBlock::iterator InsertPt;
+ if (isa<Argument>(GEP->getOperand(0))) {
+ InsertPt = GEP->getParent()->getParent()->begin()->begin();
+ } else {
+ InsertPt = cast<Instruction>(GEP->getOperand(0));
+ if (InvokeInst *II = dyn_cast<InvokeInst>(GEP->getOperand(0)))
+ InsertPt = II->getNormalDest()->begin();
+ else
+ ++InsertPt;
+ }
+ BP = new CastInst(GEP->getOperand(0), UIntPtrTy,
+ GEP->getOperand(0)->getName(), InsertPt);
}
+ BasePtr = BP;
+ }
- if (operand == CanonicalIndVar) {
- // FIXME: use getCanonicalInductionVariableIncrement to choose between
- // one and neg one maybe? We need to support int *foo = GEP base, -1
- const Type *Ty = CanonicalIndVar->getType();
- pre_op_vector.push_back(Constant::getNullValue(Ty));
- inc_op_vector.push_back(ConstantInt::get(Ty, 1));
- indvar = op;
- break;
- } else if (isa<Argument>(operand)) {
- pre_op_vector.push_back(operand);
- AllConstantOperands = false;
- } else if (isa<Constant>(operand)) {
- pre_op_vector.push_back(operand);
- } else if (Instruction *inst = dyn_cast<Instruction>(operand)) {
- if (!DS->dominates(inst, Preheader->getTerminator()))
- return;
- pre_op_vector.push_back(operand);
- AllConstantOperands = false;
+ SCEVHandle Base = SCEVUnknown::get(BasePtr);
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ unsigned i = 1;
+ for (; GEP->getOperand(i) != Op; ++i, ++GTI) {
+ // If this is a use of a recurrence that we can analyze, and it comes before
+ // Op does in the GEP operand list, we will handle this when we process this
+ // operand.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
+ uint64_t Offset = SL->MemberOffsets[Idx];
+ Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
+ UIntPtrTy));
} else {
- return; // Cannot handle this.
+ SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
+ if (CanReduceSCEV(Idx, L))
+ return;
+ Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
+ TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
}
- Cache = Cache->get(operand);
}
- assert(indvar > 0 && "Indvar used by GEP not found in operand list");
-
- // Ensure the pointer base is loop invariant. While strength reduction
- // makes sense even if the pointer changed on every iteration, there is no
- // realistic way of handling it unless GEPs were completely decomposed into
- // their constituent operations so we have explicit multiplications to work
- // with.
- if (Instruction *GepPtrOp = dyn_cast<Instruction>(GEPI->getOperand(0)))
- if (!DS->dominates(GepPtrOp, Preheader->getTerminator()))
- return;
-
- // Don't reduce multiplies that the target can handle via addressing modes.
- uint64_t sz = getAnalysis<TargetData>().getTypeSize(ty);
- if (sz && (sz & (sz-1)) == 0) // Power of two?
- if (sz <= (1ULL << (MaxTargetAMSize-1)))
- return;
-
- // If all operands of the GEP we are going to insert into the preheader
- // are constants, generate a GEP ConstantExpr instead.
- //
- // If there is only one operand after the initial non-constant one, we know
- // that it was the induction variable, and has been replaced by a constant
- // null value. In this case, replace the GEP with a use of pointer directly.
- PHINode *NewPHI;
- if (Cache->CachedPHINode == 0) {
- Value *PreGEP;
- if (AllConstantOperands && isa<Constant>(GEPI->getOperand(0))) {
- Constant *C = dyn_cast<Constant>(GEPI->getOperand(0));
- PreGEP = ConstantExpr::getGetElementPtr(C, pre_op_vector);
- } else if (pre_op_vector.size() == 1) {
- PreGEP = GEPI->getOperand(0);
+
+ // Get the index, convert it to intptr_t.
+ SCEVHandle GEPIndexExpr =
+ GetAdjustedIndex(SE->getSCEV(Op), TD->getTypeSize(GTI.getIndexedType()),
+ UIntPtrTy);
+
+ // Process all remaining subscripts in the GEP instruction.
+ for (++i, ++GTI; i != GEP->getNumOperands(); ++i, ++GTI)
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
+ uint64_t Offset = SL->MemberOffsets[Idx];
+ Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
+ UIntPtrTy));
} else {
- PreGEP = new GetElementPtrInst(GEPI->getOperand(0),
- pre_op_vector, GEPI->getName()+".pre",
- Preheader->getTerminator());
+ SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
+ if (CanReduceSCEV(Idx, L)) { // Another IV subscript
+ GEPIndexExpr = SCEVAddExpr::get(GEPIndexExpr,
+ GetAdjustedIndex(Idx, TD->getTypeSize(GTI.getIndexedType()),
+ UIntPtrTy));
+ assert(CanReduceSCEV(GEPIndexExpr, L) &&
+ "Cannot reduce the sum of two reducible SCEV's??");
+ } else {
+ Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
+ TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
+ }
}
- // The next step of the strength reduction is to create a PHI that will
- // choose between the initial GEP we created and inserted into the
- // preheader, and the incremented GEP that we will create below and insert
- // into the loop body.
- NewPHI = new PHINode(PreGEP->getType(),
- GEPI->getName()+".str", InsertBefore);
- NewPHI->addIncoming(PreGEP, Preheader);
-
- // Now, create the GEP instruction to increment by one the value selected
- // by the PHI instruction we just created above, and add it as the second
- // incoming Value/BasicBlock pair to the PHINode. It is inserted before
- // the increment of the canonical induction variable.
- Instruction *IncrInst =
- const_cast<Instruction*>(L->getCanonicalInductionVariableIncrement());
- GetElementPtrInst *StrGEP = new GetElementPtrInst(NewPHI, inc_op_vector,
- GEPI->getName()+".inc",
- IncrInst);
- pred_iterator PI = pred_begin(Header);
- if (*PI == Preheader)
- ++PI;
- NewPHI->addIncoming(StrGEP, *PI);
- Cache->CachedPHINode = NewPHI;
- } else {
- // Reuse previously created pointer, as it is identical to the one we were
- // about to create.
- NewPHI = Cache->CachedPHINode;
+ assert(CanReduceSCEV(GEPIndexExpr, L) && "Non reducible idx??");
+
+ Base = SCEVAddExpr::get(Base, cast<SCEVAddRecExpr>(GEPIndexExpr)->getStart());
+ SCEVHandle Stride = cast<SCEVAddRecExpr>(GEPIndexExpr)->getOperand(1);
+
+ DEBUG(std::cerr << "GEP BASE : " << *Base << "\n");
+ DEBUG(std::cerr << "GEP STRIDE: " << *Stride << "\n");
+
+ Value *Step = 0; // Step of ISE.
+ if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride))
+ /// Always get the step value as an unsigned value.
+ Step = ConstantExpr::getCast(SC->getValue(),
+ SC->getValue()->getType()->getUnsignedVersion());
+ else
+ Step = cast<SCEVUnknown>(Stride)->getValue();
+ assert(Step->getType()->isUnsigned() && "Bad step value!");
+
+
+ // Now that we know the base and stride contributed by the GEP instruction,
+ // process all users.
+ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && User->getParent() == L->getHeader())
+ continue;
+
+ // If this is an instruction defined in a nested loop, or outside this loop,
+ // don't mess with it.
+ if (LI->getLoopFor(User->getParent()) != L)
+ continue;
+
+ DEBUG(std::cerr << "FOUND USER: " << *User
+ << " OF STRIDE: " << *Step << " BASE = " << *Base << "\n");
+
+
+ // Okay, we found a user that we cannot reduce. Analyze the instruction
+ // and decide what to do with it.
+ IVUsesByStride[Step].addUser(Base, User, GEP);
}
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L) {
+ if (I->getType() == Type::VoidTy) return false
+ SCEVHandle ISE = SE->getSCEV(I);
+ if (!CanReduceSCEV(ISE, L)) return false;
+
+ SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(ISE);
+ SCEVHandle Start = AR->getStart();
+
+ // Get the step value, canonicalizing to an unsigned integer type so that
+ // lookups in the map will match.
+ Value *Step = 0; // Step of ISE.
+ if (SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getOperand(1)))
+ /// Always get the step value as an unsigned value.
+ Step = ConstantExpr::getCast(SC->getValue(),
+ SC->getValue()->getType()->getUnsignedVersion());
+ else
+ Step = cast<SCEVUnknown>(AR->getOperand(1))->getValue();
+ assert(Step->getType()->isUnsigned() && "Bad step value!");
+
+ std::set<GetElementPtrInst*> AnalyzedGEPs;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && User->getParent() == L->getHeader())
+ continue;
+
+ // If this is an instruction defined in a nested loop, or outside this loop,
+ // don't mess with it.
+ if (LI->getLoopFor(User->getParent()) != L)
+ continue;
+
+ // Next, see if this user is analyzable itself!
+ if (!AddUsersIfInteresting(User, L)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // If this is a getelementptr instruction, figure out what linear
+ // expression of induction variable is actually being used.
+ //
+ if (AnalyzedGEPs.insert(GEP).second) // Not already analyzed?
+ AnalyzeGetElementPtrUsers(GEP, I, L);
+ } else {
+ DEBUG(std::cerr << "FOUND USER: " << *User
+ << " OF SCEV: " << *ISE << "\n");
- if (GEPI->getNumOperands() - 1 == indvar) {
- // If there were no operands following the induction variable, replace all
- // uses of the old GEP instruction with the new PHI.
- GEPI->replaceAllUsesWith(NewPHI);
- } else {
- // Create a new GEP instruction using the new PHI as the base. The
- // operands of the original GEP past the induction variable become
- // operands of this new GEP.
- std::vector<Value *> op_vector;
- const Type *Ty = CanonicalIndVar->getType();
- op_vector.push_back(Constant::getNullValue(Ty));
- for (unsigned op = indvar + 1; op < GEPI->getNumOperands(); op++)
- op_vector.push_back(GEPI->getOperand(op));
- GetElementPtrInst *newGEP = new GetElementPtrInst(NewPHI, op_vector,
- GEPI->getName() + ".lsr",
- GEPI);
- GEPI->replaceAllUsesWith(newGEP);
+ // Okay, we found a user that we cannot reduce. Analyze the instruction
+ // and decide what to do with it.
+ IVUsesByStride[Step].addUser(Start, User, I);
+ }
+ }
+ }
+ return true;
+}
+
+namespace {
+ /// BasedUser - For a particular base value, keep information about how we've
+ /// partitioned the expression so far.
+ struct BasedUser {
+ /// Inst - The instruction using the induction variable.
+ Instruction *Inst;
+
+ /// Op - The value to replace with the EmittedBase.
+ Value *Op;
+
+ /// Imm - The immediate value that should be added to the base immediately
+ /// before Inst, because it will be folded into the imm field of the
+ /// instruction.
+ SCEVHandle Imm;
+
+ /// EmittedBase - The actual value* to use for the base value of this
+ /// operation. This is null if we should just use zero so far.
+ Value *EmittedBase;
+
+ BasedUser(Instruction *I, Value *V, const SCEVHandle &IMM)
+ : Inst(I), Op(V), Imm(IMM), EmittedBase(0) {}
+
+
+ // No need to compare these.
+ bool operator<(const BasedUser &BU) const { return 0; }
+
+ void dump() const;
+ };
+}
+
+void BasedUser::dump() const {
+ std::cerr << " Imm=" << *Imm;
+ if (EmittedBase)
+ std::cerr << " EB=" << *EmittedBase;
+
+ std::cerr << " Inst: " << *Inst;
+}
+
+/// isTargetConstant - Return true if the following can be referenced by the
+/// immediate field of a target instruction.
+static bool isTargetConstant(const SCEVHandle &V) {
+
+ // FIXME: Look at the target to decide if &GV is a legal constant immediate.
+ if (isa<SCEVConstant>(V)) return true;
+
+ return false; // ENABLE this for x86
+
+ if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
+ if (CE->getOpcode() == Instruction::Cast)
+ if (isa<GlobalValue>(CE->getOperand(0)))
+ // FIXME: should check to see that the dest is uintptr_t!
+ return true;
+ return false;
+}
+
+/// GetImmediateValues - Look at Val, and pull out any additions of constants
+/// that can fit into the immediate field of instructions in the target.
+static SCEVHandle GetImmediateValues(SCEVHandle Val, bool isAddress) {
+ if (!isAddress)
+ return SCEVUnknown::getIntegerSCEV(0, Val->getType());
+ if (isTargetConstant(Val))
+ return Val;
+
+ SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val);
+ if (SAE) {
+ unsigned i = 0;
+ for (; i != SAE->getNumOperands(); ++i)
+ if (isTargetConstant(SAE->getOperand(i))) {
+ SCEVHandle ImmVal = SAE->getOperand(i);
+
+ // If there are any other immediates that we can handle here, pull them
+ // out too.
+ for (++i; i != SAE->getNumOperands(); ++i)
+ if (isTargetConstant(SAE->getOperand(i)))
+ ImmVal = SCEVAddExpr::get(ImmVal, SAE->getOperand(i));
+ return ImmVal;
+ }
+ }
+
+ return SCEVUnknown::getIntegerSCEV(0, Val->getType());
+}
+
+/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// stride of IV. All of the users may have different starting values, and this
+/// may not be the only stride (we know it is if isOnlyStride is true).
+void LoopStrengthReduce::StrengthReduceStridedIVUsers(Value *Stride,
+ IVUse &Uses, Loop *L,
+ bool isOnlyStride) {
+ // Transform our list of users and offsets to a bit more complex table. In
+ // this new vector, the first entry for each element is the base of the
+ // strided access, and the second is the BasedUser object for the use. We
+ // progressively move information from the first to the second entry, until we
+ // eventually emit the object.
+ std::vector<std::pair<SCEVHandle, BasedUser> > UsersToProcess;
+ UsersToProcess.reserve(Uses.Users.size());
+
+ SCEVHandle ZeroBase = SCEVUnknown::getIntegerSCEV(0,
+ Uses.Users[0].first->getType());
+
+ for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i)
+ UsersToProcess.push_back(std::make_pair(Uses.Users[i].first,
+ BasedUser(Uses.Users[i].second,
+ Uses.UserOperands[i],
+ ZeroBase)));
+
+ // First pass, figure out what we can represent in the immediate fields of
+ // instructions. If we can represent anything there, move it to the imm
+ // fields of the BasedUsers.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+ bool isAddress = isa<LoadInst>(UsersToProcess[i].second.Inst) ||
+ isa<StoreInst>(UsersToProcess[i].second.Inst);
+ UsersToProcess[i].second.Imm = GetImmediateValues(UsersToProcess[i].first,
+ isAddress);
+ UsersToProcess[i].first = SCEV::getMinusSCEV(UsersToProcess[i].first,
+ UsersToProcess[i].second.Imm);
+
+ DEBUG(std::cerr << "BASE: " << *UsersToProcess[i].first);
+ DEBUG(UsersToProcess[i].second.dump());
}
- // The old GEP is now dead.
- DeadInsts.insert(GEPI);
- ++NumReduced;
+ SCEVExpander Rewriter(*SE, *LI);
+ BasicBlock *Preheader = L->getLoopPreheader();
+ Instruction *PreInsertPt = Preheader->getTerminator();
+ Instruction *PhiInsertBefore = L->getHeader()->begin();
+
+ assert(isa<PHINode>(PhiInsertBefore) &&
+ "How could this loop have IV's without any phis?");
+ PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
+ assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
+ "This loop isn't canonicalized right");
+ BasicBlock *LatchBlock =
+ SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
+
+ // FIXME: This loop needs increasing levels of intelligence.
+ // STAGE 0: just emit everything as its own base. <-- We are here
+ // STAGE 1: factor out common vars from bases, and try and push resulting
+ // constants into Imm field.
+ // STAGE 2: factor out large constants to try and make more constants
+ // acceptable for target loads and stores.
+ std::sort(UsersToProcess.begin(), UsersToProcess.end());
+
+ while (!UsersToProcess.empty()) {
+ // Create a new Phi for this base, and stick it in the loop header.
+ Value *Replaced = UsersToProcess.front().second.Op;
+ const Type *ReplacedTy = Replaced->getType();
+ PHINode *NewPHI = new PHINode(ReplacedTy, Replaced->getName()+".str",
+ PhiInsertBefore);
+
+ // Emit the initial base value into the loop preheader, and add it to the
+ // Phi node.
+ Value *BaseV = Rewriter.expandCodeFor(UsersToProcess.front().first,
+ PreInsertPt, ReplacedTy);
+ NewPHI->addIncoming(BaseV, Preheader);
+
+ // Emit the increment of the base value before the terminator of the loop
+ // latch block, and add it to the Phi node.
+ SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
+ SCEVUnknown::get(Stride));
+
+ Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(),
+ ReplacedTy);
+ IncV->setName(NewPHI->getName()+".inc");
+ NewPHI->addIncoming(IncV, LatchBlock);
+
+ // Emit the code to add the immediate offset to the Phi value, just before
+ // the instruction that we identified as using this stride and base.
+ // First, empty the SCEVExpander's expression map so that we are guaranteed
+ // to have the code emitted where we expect it.
+ Rewriter.clear();
+ SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
+ UsersToProcess.front().second.Imm);
+ Value *newVal = Rewriter.expandCodeFor(NewValSCEV,
+ UsersToProcess.front().second.Inst,
+ ReplacedTy);
+
+ // Replace the use of the operand Value with the new Phi we just created.
+ DEBUG(std::cerr << "REPLACING: " << *Replaced << "IN: " <<
+ *UsersToProcess.front().second.Inst << "WITH: "<< *newVal << '\n');
+ UsersToProcess.front().second.Inst->replaceUsesOfWith(Replaced, newVal);
+
+ // Mark old value we replaced as possibly dead, so that it is elminated
+ // if we just replaced the last use of that value.
+ DeadInsts.insert(cast<Instruction>(Replaced));
+
+ UsersToProcess.erase(UsersToProcess.begin());
+ ++NumReduced;
+
+ // TODO: Next, find out which base index is the most common, pull it out.
+ }
+
+ // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
+ // different starting values, into different PHIs.
+
+ // BEFORE writing this, it's probably useful to handle GEP's.
+
+ // NOTE: pull all constants together, for REG+IMM addressing, include &GV in
+ // 'IMM' if the target supports it.
}
+
void LoopStrengthReduce::runOnLoop(Loop *L) {
// First step, transform all loops nesting inside of this loop.
for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
runOnLoop(*I);
- // Next, get the first PHINode since it is guaranteed to be the canonical
- // induction variable for the loop by the preceding IndVarSimplify pass.
- PHINode *PN = L->getCanonicalInductionVariable();
- if (0 == PN)
- return;
-
- // FIXME: Need to use SCEV to detect GEP uses of the indvar, since indvars
- // pass creates code like this, which we can't currently detect:
- // %tmp.1 = sub uint 2000, %indvar
- // %tmp.8 = getelementptr int* %y, uint %tmp.1
-
- // Strength reduce all GEPs in the Loop. Insert secondary PHI nodes for the
- // strength reduced pointers we'll be creating after the canonical induction
- // variable's PHI.
- std::set<Instruction*> DeadInsts;
- GEPCache Cache;
- for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
- UI != UE; ++UI)
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
- strengthReduceGEP(GEPI, L, &Cache, PN->getNext(), DeadInsts);
+ // Next, find all uses of induction variables in this loop, and catagorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ AddUsersIfInteresting(I, L);
+
+ // If we have nothing to do, return.
+ //if (IVUsesByStride.empty()) return;
+
+ // FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
+ // doing computation in byte values, promote to 32-bit values if safe.
+
+ // FIXME: Attempt to reuse values across multiple IV's. In particular, we
+ // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
+ // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need
+ // to be careful that IV's are all the same type. Only works for intptr_t
+ // indvars.
+
+ // If we only have one stride, we can more aggressively eliminate some things.
+ bool HasOneStride = IVUsesByStride.size() == 1;
+
+ for (std::map<Value*, IVUse>::iterator SI = IVUsesByStride.begin(),
+ E = IVUsesByStride.end(); SI != E; ++SI)
+ StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
// Clean up after ourselves
if (!DeadInsts.empty()) {
DeleteTriviallyDeadInstructions(DeadInsts);
- // At this point, we know that we have killed one or more GEP instructions.
- // It is worth checking to see if the cann indvar is also dead, so that we
- // can remove it as well. The requirements for the cann indvar to be
- // considered dead are:
- // 1. the cann indvar has one use
- // 2. the use is an add instruction
- // 3. the add has one use
- // 4. the add is used by the cann indvar
- // If all four cases above are true, then we can remove both the add and
- // the cann indvar.
- // FIXME: this needs to eliminate an induction variable even if it's being
- // compared against some value to decide loop termination.
- if (PN->hasOneUse()) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
- if (BO && BO->getOpcode() == Instruction::Add)
- if (BO->hasOneUse()) {
- if (PN == dyn_cast<PHINode>(*(BO->use_begin()))) {
- DeadInsts.insert(BO);
- // Break the cycle, then delete the PHI.
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
- DeleteTriviallyDeadInstructions(DeadInsts);
+ BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN;
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ // At this point, we know that we have killed one or more GEP instructions.
+ // It is worth checking to see if the cann indvar is also dead, so that we
+ // can remove it as well. The requirements for the cann indvar to be
+ // considered dead are:
+ // 1. the cann indvar has one use
+ // 2. the use is an add instruction
+ // 3. the add has one use
+ // 4. the add is used by the cann indvar
+ // If all four cases above are true, then we can remove both the add and
+ // the cann indvar.
+ // FIXME: this needs to eliminate an induction variable even if it's being
+ // compared against some value to decide loop termination.
+ if (PN->hasOneUse()) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
+ if (BO && BO->getOpcode() == Instruction::Add)
+ if (BO->hasOneUse()) {
+ if (PN == dyn_cast<PHINode>(*(BO->use_begin()))) {
+ DeadInsts.insert(BO);
+ // Break the cycle, then delete the PHI.
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
}
- }
+ }
}
+ DeleteTriviallyDeadInstructions(DeadInsts);
}
+
+ IVUsesByStride.clear();
+ return;
}