diff options
author | Dan Gohman <gohman@apple.com> | 2010-02-12 10:34:29 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-02-12 10:34:29 +0000 |
commit | 572645cf84060c0fc25cb91d38cb9079918b3a88 (patch) | |
tree | 0571ce42ea03d210844a627baea045fa36f16df5 /lib/Transforms/Scalar/LoopStrengthReduce.cpp | |
parent | 5cef638855c9f2bb23a9c181cc47ddace8551f50 (diff) |
Reapply the new LoopStrengthReduction code, with compile time and
bug fixes, and with improved heuristics for analyzing foreign-loop
addrecs.
This change also flattens IVUsers, eliminating the stride-oriented
groupings, which makes it easier to work with.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95975 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 5007 |
1 files changed, 2730 insertions, 2277 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a5611ff113..73d3f9db89 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -17,6 +17,40 @@ // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // +// Terminology note: this code has a lot of handling for "post-increment" or +// "post-inc" users. This is not talking about post-increment addressing modes; +// it is instead talking about code like this: +// +// %i = phi [ 0, %entry ], [ %i.next, %latch ] +// ... +// %i.next = add %i, 1 +// %c = icmp eq %i.next, %n +// +// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however +// it's useful to think about these as the same register, with some uses using +// the value of the register before the add and some using // it after. In this +// example, the icmp is a post-increment user, since it uses %i.next, which is +// the value of the induction variable after the increment. The other common +// case of post-increment users is users outside the loop. +// +// TODO: More sophistication in the way Formulae are generated and filtered. +// +// TODO: Handle multiple loops at a time. +// +// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr +// instead of a GlobalValue? +// +// TODO: When truncation is free, truncate ICmp users' operands to make it a +// smaller encoding (on x86 at least). +// +// TODO: When a negated register is used by an add (such as in a list of +// multiple base registers, or as the increment expression in an addrec), +// we may not actually need both reg and (-1 * reg) in registers; the +// negation can be implemented by using a sub instead of an add. The +// lack of support for taking this into consideration when making +// register pressure decisions is partly worked around by the "Special" +// use kind. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" @@ -26,208 +60,401 @@ #include "llvm/IntrinsicInst.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; -STATISTIC(NumReduced , "Number of IV uses strength reduced"); -STATISTIC(NumInserted, "Number of PHIs inserted"); -STATISTIC(NumVariable, "Number of PHIs with variable strides"); -STATISTIC(NumEliminated, "Number of strides eliminated"); -STATISTIC(NumShadow, "Number of Shadow IVs optimized"); -STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); -STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); -STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); +namespace { + +/// RegSortData - This class holds data which is used to order reuse candidates. +class RegSortData { +public: + /// UsedByIndices - This represents the set of LSRUse indices which reference + /// a particular register. + SmallBitVector UsedByIndices; + + RegSortData() {} + + void print(raw_ostream &OS) const; + void dump() const; +}; -static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", - cl::init(false), - cl::Hidden); +} + +void RegSortData::print(raw_ostream &OS) const { + OS << "[NumUses=" << UsedByIndices.count() << ']'; +} + +void RegSortData::dump() const { + print(errs()); errs() << '\n'; +} namespace { - struct BasedUser; +/// RegUseTracker - Map register candidates to information about how they are +/// used. +class RegUseTracker { + typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; - /// IVInfo - This structure keeps track of one IV expression inserted during - /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as - /// well as the PHI node and increment value created for rewrite. - struct IVExpr { - const SCEV *Stride; - const SCEV *Base; - PHINode *PHI; + RegUsesTy RegUses; + SmallVector<const SCEV *, 16> RegSequence; - IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) - : Stride(stride), Base(base), PHI(phi) {} - }; +public: + void CountRegister(const SCEV *Reg, size_t LUIdx); + + bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; + + const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; + + void clear(); + + typedef SmallVectorImpl<const SCEV *>::iterator iterator; + typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator; + iterator begin() { return RegSequence.begin(); } + iterator end() { return RegSequence.end(); } + const_iterator begin() const { return RegSequence.begin(); } + const_iterator end() const { return RegSequence.end(); } +}; + +} + +void +RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { + std::pair<RegUsesTy::iterator, bool> Pair = + RegUses.insert(std::make_pair(Reg, RegSortData())); + RegSortData &RSD = Pair.first->second; + if (Pair.second) + RegSequence.push_back(Reg); + RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); + RSD.UsedByIndices.set(LUIdx); +} + +bool +RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { + if (!RegUses.count(Reg)) return false; + const SmallBitVector &UsedByIndices = + RegUses.find(Reg)->second.UsedByIndices; + int i = UsedByIndices.find_first(); + if (i == -1) return false; + if ((size_t)i != LUIdx) return true; + return UsedByIndices.find_next(i) != -1; +} + +const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { + RegUsesTy::const_iterator I = RegUses.find(Reg); + assert(I != RegUses.end() && "Unknown register!"); + return I->second.UsedByIndices; +} + +void RegUseTracker::clear() { + RegUses.clear(); + RegSequence.clear(); +} + +namespace { + +/// Formula - This class holds information that describes a formula for +/// computing satisfying a use. It may include broken-out immediates and scaled +/// registers. +struct Formula { + /// AM - This is used to represent complex addressing, as well as other kinds + /// of interesting uses. + TargetLowering::AddrMode AM; + + /// BaseRegs - The list of "base" registers for this use. When this is + /// non-empty, AM.HasBaseReg should be set to true. + SmallVector<const SCEV *, 2> BaseRegs; + + /// ScaledReg - The 'scaled' register for this use. This should be non-null + /// when AM.Scale is not zero. + const SCEV *ScaledReg; + + Formula() : ScaledReg(0) {} + + void InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT); + + unsigned getNumRegs() const; + const Type *getType() const; + + bool referencesReg(const SCEV *S) const; + bool hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const; + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} + +/// DoInitialMatch - Recurrsion helper for InitialMatch. +static void DoInitialMatch(const SCEV *S, Loop *L, + SmallVectorImpl<const SCEV *> &Good, + SmallVectorImpl<const SCEV *> &Bad, + ScalarEvolution &SE, DominatorTree &DT) { + // Collect expressions which properly dominate the loop header. + if (S->properlyDominates(L->getHeader(), &DT)) { + Good.push_back(S); + return; + } - /// IVsOfOneStride - This structure keeps track of all IV expression inserted - /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct IVsOfOneStride { - std::vector<IVExpr> IVs; + // Look at add operands. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + DoInitialMatch(*I, L, Good, Bad, SE, DT); + return; + } - void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { - IVs.push_back(IVExpr(Stride, Base, PHI)); + // Look at addrec operands. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + if (!AR->getStart()->isZero()) { + DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); + DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), + L, Good, Bad, SE, DT); + return; } - }; - class LoopStrengthReduce : public LoopPass { - IVUsers *IU; - ScalarEvolution *SE; - bool Changed; - - /// IVsByStride - Keep track of all IVs that have been inserted for a - /// particular stride. - std::map<const SCEV *, IVsOfOneStride> IVsByStride; - - /// DeadInsts - Keep track of instructions we may have made dead, so that - /// we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; - - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *TLI; - - public: - static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) {} - - bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - // We split critical edges, so we change the CFG. However, we do update - // many analyses if they are around. - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("loops"); - AU.addPreserved("domfrontier"); - AU.addPreserved("domtree"); - - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<ScalarEvolution>(); - AU.addPreserved<ScalarEvolution>(); - AU.addRequired<IVUsers>(); - AU.addPreserved<IVUsers>(); + // Handle a multiplication by -1 (negation) if it didn't fold. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) + if (Mul->getOperand(0)->isAllOnesValue()) { + SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); + const SCEV *NewMul = SE.getMulExpr(Ops); + + SmallVector<const SCEV *, 4> MyGood; + SmallVector<const SCEV *, 4> MyBad; + DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); + const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( + SE.getEffectiveSCEVType(NewMul->getType()))); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), + E = MyGood.end(); I != E; ++I) + Good.push_back(SE.getMulExpr(NegOne, *I)); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(), + E = MyBad.end(); I != E; ++I) + Bad.push_back(SE.getMulExpr(NegOne, *I)); + return; } - private: - void OptimizeIndvars(Loop *L); - - /// OptimizeLoopTermCond - Change loop terminating condition to use the - /// postinc iv when possible. - void OptimizeLoopTermCond(Loop *L); - - /// OptimizeShadowIV - If IV is used in a int-to-float cast - /// inside the loop then try to eliminate the cast opeation. - void OptimizeShadowIV(Loop *L); - - /// OptimizeMax - Rewrite the loop's terminating condition - /// if it uses a max computation. - ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); - - /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for - /// deciding when to exit the loop is used only for that purpose, try to - /// rearrange things so it counts down to a test against zero. - bool OptimizeLoopCountIV(Loop *L); - bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, - IVStrideUse* &CondUse, Loop *L); - - /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a - /// single stride of IV. All of the users may have different starting - /// values, and this may not be the only stride. - void StrengthReduceIVUsersOfStride(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L); - void StrengthReduceIVUsers(Loop *L); - - ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV* &CondStride, - bool PostPass = false); - - bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* &CondStride); - bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, - IVExpr&, const Type*, - const std::vector<BasedUser>& UsersToProcess); - bool ValidScale(bool, int64_t, - const std::vector<BasedUser>& UsersToProcess); - bool ValidOffset(bool, int64_t, int64_t, - const std::vector<BasedUser>& UsersToProcess); - const SCEV *CollectIVUsers(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool &AllUsesAreAddresses, - bool &AllUsesAreOutsideLoop, - std::vector<BasedUser> &UsersToProcess); - bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); - bool ShouldUseFullStrengthReductionMode( - const std::vector<BasedUser> &UsersToProcess, - const Loop *L, - bool AllUsesAreAddresses, - const SCEV *Stride); - void PrepareToStrengthReduceFully( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - const Loop *L, - SCEVExpander &PreheaderRewriter); - void PrepareToStrengthReduceFromSmallerStride( - std::vector<BasedUser> &UsersToProcess, - Value *CommonBaseV, - const IVExpr &ReuseIV, - Instruction *PreInsertPt); - void PrepareToStrengthReduceWithNewPhi( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - Value *CommonBaseV, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &PreheaderRewriter); - - void DeleteTriviallyDeadInstructions(); - }; + // Ok, we can't do anything interesting. Just stuff the whole thing into a + // register and hope for the best. + Bad.push_back(S); } -char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +/// InitialMatch - Incorporate loop-variant parts of S into this Formula, +/// attempting to keep all loop-invariant and loop-computable values in a +/// single base register. +void Formula::InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + SmallVector<const SCEV *, 4> Good; + SmallVector<const SCEV *, 4> Bad; + DoInitialMatch(S, L, Good, Bad, SE, DT); + if (!Good.empty()) { + BaseRegs.push_back(SE.getAddExpr(Good)); + AM.HasBaseReg = true; + } + if (!Bad.empty()) { + BaseRegs.push_back(SE.getAddExpr(Bad)); + AM.HasBaseReg = true; + } +} -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +/// getNumRegs - Return the total number of register operands used by this +/// formula. This does not include register uses implied by non-constant +/// addrec strides. +unsigned Formula::getNumRegs() const { + return !!ScaledReg + BaseRegs.size(); } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. -void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); +/// getType - Return the type of this formula, if it has one, or null +/// otherwise. This type is meaningless except for the bit size. +const Type *Formula::getType() const { + return !BaseRegs.empty() ? BaseRegs.front()->getType() : + ScaledReg ? ScaledReg->getType() : + AM.BaseGV ? AM.BaseGV->getType() : + 0; +} - if (I == 0 || !isInstructionTriviallyDead(I)) - continue; +/// referencesReg - Test if this formula references the given register. +bool Formula::referencesReg(const SCEV *S) const { + return S == ScaledReg || + std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); +} - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast<Instruction>(*OI)) { - *OI = 0; - if (U->use_empty()) - DeadInsts.push_back(U); +/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers +/// which are used by uses other than the use with the given index. +bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const { + if (ScaledReg) + if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) + return true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) + if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx)) + return true; + return false; +} + +void Formula::print(raw_ostream &OS) const { + bool First = true; + if (AM.BaseGV) { + if (!First) OS << " + "; else First = false; + WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); + } + if (AM.BaseOffs != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.BaseOffs; + } + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) { + if (!First) OS << " + "; else First = false; + OS << "reg(" << **I << ')'; + } + if (AM.Scale != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.Scale << "*reg("; + if (ScaledReg) + OS << *ScaledReg; + else + OS << "<unknown>"; + OS << ')'; + } +} + +void Formula::dump() const { + print(errs()); errs() << '\n'; +} + +/// getSDiv - Return an expression for LHS /s RHS, if it can be determined, +/// or null otherwise. If IgnoreSignificantBits is true, expressions like +/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may +/// overflow, which is useful when the result will be used in a context where +/// the most significant bits are ignored. +static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS, + ScalarEvolution &SE, + bool IgnoreSignificantBits = false) { + // Handle the trivial case, which works for any SCEV type. + if (LHS == RHS) + return SE.getIntegerSCEV(1, LHS->getType()); + + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some + // folding. + if (RHS->isAllOnesValue()) + return SE.getMulExpr(LHS, RHS); + + // Check for a division of a constant by a constant. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (!RC) + return 0; + if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + return 0; + return SE.getConstant(C->getValue()->getValue() + .sdiv(RC->getValue()->getValue())); + } + + // Distribute the sdiv over addrec operands. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { + const SCEV *Start = getSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; + const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE, + IgnoreSignificantBits); + if (!Step) return 0; + return SE.getAddRecExpr(Start, Step, AR->getLoop()); + } + + // Distribute the sdiv over add operands. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) { + SmallVector<const SCEV *, 8> Ops; + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Op = getSDiv(*I, RHS, SE, + IgnoreSignificantBits); + if (!Op) return 0; + Ops.push_back(Op); + } + return SE.getAddExpr(Ops); + } + + // Check for a multiply operand that we can pull RHS out of. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) { + SmallVector<const SCEV *, 4> Ops; + bool Found = false; + for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); + I != E; ++I) { + if (!Found) + if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) { + Ops.push_back(Q); + Found = true; + continue; + } + Ops.push_back(*I); } + return Found ? SE.getMulExpr(Ops) : 0; + } - I->eraseFromParent(); - Changed = true; + // Otherwise we don't know. + return 0; +} + +/// ExtractImmediate - If S involves the addition of a constant integer value, +/// return that integer value, and mutate S to point to a new SCEV with that +/// value excluded. +static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (C->getValue()->getValue().getMinSignedBits() <= 64) { + S = SE.getIntegerSCEV(0, C->getType()); + return C->getValue()->getSExtValue(); + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; + } + return 0; +} + +/// ExtractSymbol - If S involves the addition of a GlobalValue address, +/// return that symbol, and mutate S to point to a new SCEV with that +/// value excluded. +static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { + S = SE.getIntegerSCEV(0, GV->getType()); + return GV; + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; } + return 0; } /// isAddressUse - Returns true if the specified instruction is using the @@ -276,1776 +503,818 @@ static const Type *getAccessType(const Instruction *Inst) { break; } } - return AccessTy; -} - -namespace { - /// BasedUser - For a particular base value, keep information about how we've - /// partitioned the expression so far. - struct BasedUser { - /// Base - The Base value for the PHI node that needs to be inserted for - /// this use. As the use is processed, information gets moved from this - /// field to the Imm field (below). BasedUser values are sorted by this - /// field. - const SCEV *Base; - - /// Inst - The instruction using the induction variable. - Instruction *Inst; - - /// OperandValToReplace - The operand value of Inst to replace with the - /// EmittedBase. - Value *OperandValToReplace; - - /// Imm - The immediate value that should be added to the base immediately - /// before Inst, because it will be folded into the imm field of the - /// instruction. This is also sometimes used for loop-variant values that - /// must be added inside the loop. - const SCEV *Imm; - - /// Phi - The induction variable that performs the striding that - /// should be used for this user. - PHINode *Phi; - - // isUseOfPostIncrementedValue - True if this should use the - // post-incremented version of this IV, not the preincremented version. - // This can only be set in special cases, such as the terminating setcc - // instruction for a loop and uses outside the loop that are dominated by - // the loop. - bool isUseOfPostIncrementedValue; - - BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : Base(IVSU.getOffset()), Inst(IVSU.getUser()), - OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(se->getIntegerSCEV(0, Base->getType())), - isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} - - // Once we rewrite the code to insert the new IVs we want, update the - // operands of Inst to use the new expression 'NewBase', with 'Imm' added - // to it. - void RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *InsertPt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE); - - Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE); - void dump() const; - }; -} - -void BasedUser::dump() const { - dbgs() << " Base=" << *Base; - dbgs() << " Imm=" << *Imm; - dbgs() << " Inst: " << *Inst; -} -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE) { - Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + // All pointers have the same requirements, so canonicalize them to an + // arbitrary pointer type to minimize variation. + if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), + PTy->getAddressSpace()); - // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to - // re-analyze it. - const SCEV *NewValSCEV = SE->getUnknown(Base); - - // Always emit the immediate into the same block as the user. - NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); - - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); + return AccessTy; } +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +static bool +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { + bool Changed = false; -// Once we rewrite the code to insert the new IVs we want, update the -// operands of Inst to use the new expression 'NewBase', with 'Imm' added -// to it. NewBasePt is the last instruction which contributes to the -// value of NewBase in the case that it's a diffferent instruction from -// the PHI that NewBase is computed from, or null otherwise. -// -void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *NewBasePt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE) { - if (!isa<PHINode>(Inst)) { - // By default, insert code at the user instruction. - BasicBlock::iterator InsertPt = Inst; - - // However, if the Operand is itself an instruction, the (potentially - // complex) inserted code may be shared by many users. Because of this, we - // want to emit code for the computation of the operand right before its old - // computation. This is usually safe, because we obviously used to use the - // computation when it was computed in its current block. However, in some - // cases (e.g. use of a post-incremented induction variable) the NewBase - // value will be pinned to live somewhere after the original computation. - // In this case, we have to back off. - // - // If this is a use outside the loop (which means after, since it is based - // on a loop indvar) we use the post-incremented value, so that we don't - // artificially make the preinc value live out the bottom of the loop. - if (!isUseOfPostIncrementedValue && L->contains(Inst)) { - if (NewBasePt && isa<PHINode>(OperandValToReplace)) { - InsertPt = NewBasePt; - ++InsertPt; - } else if (Instruction *OpInst - = dyn_cast<Instruction>(OperandValToReplace)) { - InsertPt = OpInst; - while (isa<PHINode>(InsertPt)) ++InsertPt; - } - } - Value *NewVal = InsertCodeForBaseAtPosition(NewBase, - OperandValToReplace->getType(), - Rewriter, InsertPt, SE); - // Replace the use of the operand Value with the new Phi we just created. - Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - - DEBUG(dbgs() << " Replacing with "); - DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); - return; - } + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); - // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm - // expression into each operand block that uses it. Note that PHI nodes can - // have multiple entries for the same predecessor. We use a map to make sure - // that a PHI node only has a single Value* for each predecessor (which also - // prevents us from inserting duplicate code in some blocks). - DenseMap<BasicBlock*, Value*> InsertedCode; - PHINode *PN = cast<PHINode>(Inst); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) == OperandValToReplace) { - // If the original expression is outside the loop, put the replacement - // code in the same place as the original expression, - // which need not be an immediate predecessor of this PHI. This way we - // need only one copy of it even if it is referenced multiple times in - // the PHI. We don't do this when the original expression is inside the - // loop because multiple copies sometimes do useful sinking of code in - // that case(?). - Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); - BasicBlock *PHIPred = PN->getIncomingBlock(i); - if (L->contains(OldLoc)) { - // If this is a critical edge, split the edge so that we do not insert - // the code on all predecessor/successor paths. We do this unless this - // is the canonical backedge for this loop, as this can make some - // inserted code be in an illegal position. - if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(PHIPred->getTerminator()) && - (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { - - // First step, split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), - P, false); - - // Next step: move the basic block. In particular, if the PHI node - // is outside of the loop, and PredTI is in the loop, we want to - // move the block to be immediately before the PHI block, not - // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN)) - NewBB->moveBefore(PN->getParent()); + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; - // Splitting the edge can reduce the number of PHI entries we have. - e = PN->getNumIncomingValues(); - PHIPred = NewBB; - i = PN->getBasicBlockIndex(PHIPred); - } - } - Value *&Code = InsertedCode[PHIPred]; - if (!Code) { - // Insert the code into the end of the predecessor block. - Instruction *InsertPt = (L->contains(OldLoc)) ? - PHIPred->getTerminator() : - OldLoc->getParent()->getTerminator(); - Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, SE); - - DEBUG(dbgs() << " Changing PHI use to "); - DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); } - // Replace the use of the operand Value with the new Phi we just created. - PN->setIncomingValue(i, Code); - Rewriter.clear(); - } + I->eraseFromParent(); + Changed = true; } - // PHI node might have become a constant value after SplitCriticalEdge. - DeadInsts.push_back(Inst); + return Changed; } +namespace { -/// fitsInAddressMode - Return true if V can be subsumed within an addressing -/// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy, - const TargetLowering *TLI, bool HasBaseReg) { - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { - int64_t VC = SC->getValue()->getSExtValue(); - if (TLI) { - TargetLowering::AddrMode AM; - AM.BaseOffs = VC; - AM.HasBaseReg = HasBaseReg; - return TLI->isLegalAddressingMode(AM, AccessTy); - } else { - // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. - return (VC > -(1 << 16) && VC < (1 << 16)-1); - } - } - - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) - if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) { - if (TLI) { - TargetLowering::AddrMode AM; |