diff options
author | Dan Gohman <gohman@apple.com> | 2010-01-22 00:46:49 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-01-22 00:46:49 +0000 |
commit | 7979b72febb73f7bb1d1ed095a68f210822b2e7c (patch) | |
tree | d74dd6f92611e69526c8684793e34a6a3513c0cc | |
parent | 3ec68f97ead4a2bc339b1b9012ca4cb2c4dd706a (diff) |
Revert LoopStrengthReduce.cpp to pre-r94061 for now.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94123 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Analysis/ScalarEvolutionExpander.h | 3 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 4623 | ||||
-rw-r--r-- | test/CodeGen/ARM/arm-negative-stride.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-code-insertion.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/remat.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/lsr-deficiency.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/2006-05-11-InstrSched.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/iv-users-in-other-loops.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce4.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce8.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-reuse.ll | 159 | ||||
-rw-r--r-- | test/CodeGen/X86/masked-iv-safe.ll | 7 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll | 5 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll | 7 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/count-to-zero.ll | 2 |
17 files changed, 2308 insertions, 2601 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 5bec8e445a..01df503a5e 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -27,7 +27,10 @@ namespace llvm { /// and destroy it when finished to allow the release of the associated /// memory. class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> { + public: ScalarEvolution &SE; + + private: std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> > InsertedExpressions; std::set<Value*> InsertedValues; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index acc66246c3..fa820ed8e4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -17,45 +17,6 @@ // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // -// Terminology note: this code has a lot of handling for "post-increment" or -// "post-inc" users. This is not talking about post-increment addressing modes; -// it is instead talking about code like this: -// -// %i = phi [ 0, %entry ], [ %i.next, %latch ] -// ... -// %i.next = add %i, 1 -// %c = icmp eq %i.next, %n -// -// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however -// it's useful to think about these as the same register, with some uses using -// the value of the register before the add and some using // it after. In this -// example, the icmp is a post-increment user, since it uses %i.next, which is -// the value of the induction variable after the increment. The other common -// case of post-increment users is users outside the loop. -// -// TODO: More sophistication in the way Formulae are generated. -// -// TODO: Handle multiple loops at a time. -// -// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is -// that {0,+,1}<%bb> is getting picked first because all 7 uses can -// use it, and while it's a pretty good solution, it means that LSR -// doesn't look further to find an even better solution. -// -// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr -// instead of a GlobalValue? -// -// TODO: When truncation is free, truncate ICmp users' operands to make it a -// smaller encoding (on x86 at least). -// -// TODO: When a negated register is used by an add (such as in a list of -// multiple base registers, or as the increment expression in an addrec), -// we may not actually need both reg and (-1 * reg) in registers; the -// negation can be implemented by using a sub instead of an add. The -// lack of support for taking this into consideration when making -// register pressure decisions is partly worked around by the "Special" -// use kind. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" @@ -65,1549 +26,2025 @@ #include "llvm/IntrinsicInst.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; -namespace { - -// Constant strides come first which in turns are sorted by their absolute -// values. If absolute values are the same, then positive strides comes first. -// e.g. -// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X -struct StrideCompare { - const ScalarEvolution &SE; - explicit StrideCompare(const ScalarEvolution &se) : SE(se) {} - - bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const { - const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); - const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); - if (LHSC && RHSC) { - unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()), - SE.getTypeSizeInBits(RHS->getType())); - APInt LV = LHSC->getValue()->getValue(); - APInt RV = RHSC->getValue()->getValue(); - LV.sextOrTrunc(BitWidth); - RV.sextOrTrunc(BitWidth); - APInt ALV = LV.abs(); - APInt ARV = RV.abs(); - if (ALV == ARV) { - if (LV != RV) - return LV.sgt(RV); - } else { - return ALV.ult(ARV); - } - - // If it's the same value but different type, sort by bit width so - // that we emit larger induction variables before smaller - // ones, letting the smaller be re-written in terms of larger ones. - return SE.getTypeSizeInBits(RHS->getType()) < - SE.getTypeSizeInBits(LHS->getType()); - } - return LHSC && !RHSC; - } -}; - -/// RegSortData - This class holds data which is used to order reuse -/// candidates. -class RegSortData { -public: - /// Bits - This represents the set of LSRUses (by index) which reference a - /// particular register. - SmallBitVector Bits; - - /// MaxComplexity - This represents the greatest complexity (see the comments - /// on Formula::getComplexity) seen with a particular register. - uint32_t MaxComplexity; - - /// Index - This holds an arbitrary value used as a last-resort tie breaker - /// to ensure deterministic behavior. - unsigned Index; - - RegSortData() {} - - void print(raw_ostream &OS) const; - void dump() const; -}; +STATISTIC(NumReduced , "Number of IV uses strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); +STATISTIC(NumEliminated, "Number of strides eliminated"); +STATISTIC(NumShadow, "Number of Shadow IVs optimized"); +STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); +STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); +STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); -} - -void RegSortData::print(raw_ostream &OS) const { - OS << "[NumUses=" << Bits.count() - << ", MaxComplexity="; - OS.write_hex(MaxComplexity); - OS << ", Index=" << Index << ']'; -} - -void RegSortData::dump() const { - print(errs()); errs() << '\n'; -} +static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", + cl::init(false), + cl::Hidden); namespace { -/// RegCount - This is a helper class to sort a given set of registers -/// according to associated RegSortData values. -class RegCount { -public: - const SCEV *Reg; - RegSortData Sort; - - RegCount(const SCEV *R, const RegSortData &RSD) - : Reg(R), Sort(RSD) {} - - // Sort by count. Returning true means the register is preferred. - bool operator<(const RegCount &Other) const { - // Sort by the number of unique uses of this register. - unsigned A = Sort.Bits.count(); - unsigned B = Other.Sort.Bits.count(); - if (A != B) return A > B; - - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { - const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg); - // AddRecs have higher priority than other things. - if (!BR) return true; - - // Prefer affine values. - if (AR->isAffine() != BR->isAffine()) - return AR->isAffine(); - - const Loop *AL = AR->getLoop(); - const Loop *BL = BR->getLoop(); - if (AL != BL) { - unsigned ADepth = AL->getLoopDepth(); - unsigned BDepth = BL->getLoopDepth(); - // Prefer a less deeply nested addrec. - if (ADepth != BDepth) - return ADepth < BDepth; - - // Different loops at the same depth; do something arbitrary. - BasicBlock *AH = AL->getHeader(); - BasicBlock *BH = BL->getHeader(); - for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I) - if (&*I == BH) return true; - return false; - } + struct BasedUser; - // Sort addrecs by stride. - const SCEV *AStep = AR->getOperand(1); - const SCEV *BStep = BR->getOperand(1); - if (AStep != BStep) { - if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) { - const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep); - if (!BC) return true; - // Arbitrarily prefer wider registers. - if (AC->getValue()->getValue().getBitWidth() != - BC->getValue()->getValue().getBitWidth()) - return AC->getValue()->getValue().getBitWidth() > - BC->getValue()->getValue().getBitWidth(); - // Ignore the sign bit, assuming that striding by a negative value - // is just as easy as by a positive value. - // Prefer the addrec with the lesser absolute value stride, as it - // will allow uses to have simpler addressing modes. - return AC->getValue()->getValue().abs() - .ult(BC->getValue()->getValue().abs()); - } - } + /// IVInfo - This structure keeps track of one IV expression inserted during + /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as + /// well as the PHI node and increment value created for rewrite. + struct IVExpr { + const SCEV *Stride; + const SCEV *Base; + PHINode *PHI; - // Then sort by the register which will permit the simplest uses. - // This is a heuristic; currently we only track the most complex use as a - // representative. - if (Sort.MaxComplexity != Other.Sort.MaxComplexity) - return Sort.MaxComplexity < Other.Sort.MaxComplexity; - - // Then sort them by their start values. - const SCEV *AStart = AR->getStart(); - const SCEV *BStart = BR->getStart(); - if (AStart != BStart) { - if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) { - const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart); - if (!BC) return true; - // Arbitrarily prefer wider registers. - if (AC->getValue()->getValue().getBitWidth() != - BC->getValue()->getValue().getBitWidth()) - return AC->getValue()->getValue().getBitWidth() > - BC->getValue()->getValue().getBitWidth(); - // Prefer positive over negative if the absolute values are the same. - if (AC->getValue()->getValue().abs() == - BC->getValue()->getValue().abs()) - return AC->getValue()->getValue().isStrictlyPositive(); - // Prefer the addrec with the lesser absolute value start. - return AC->getValue()->getValue().abs() - .ult(BC->getValue()->getValue().abs()); - } - } - } else { - // AddRecs have higher priority than other things. - if (isa<SCEVAddRecExpr>(Other.Reg)) return false; - // Sort by the register which will permit the simplest uses. - // This is a heuristic; currently we only track the most complex use as a - // representative. - if (Sort.MaxComplexity != Other.Sort.MaxComplexity) - return Sort.MaxComplexity < Other.Sort.MaxComplexity; - } + IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) + : Stride(stride), Base(base), PHI(phi) {} + }; + /// IVsOfOneStride - This structure keeps track of all IV expression inserted + /// during StrengthReduceStridedIVUsers for a particular stride of the IV. + struct IVsOfOneStride { + std::vector<IVExpr> IVs; - // Tie-breaker: the arbitrary index, to ensure a reliable ordering. - return Sort.Index < Other.Sort.Index; - } + void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { + IVs.push_back(IVExpr(Stride, Base, PHI)); + } + }; - void print(raw_ostream &OS) const; - void dump() const; -}; + class LoopStrengthReduce : public LoopPass { + IVUsers *IU; + ScalarEvolution *SE; + bool Changed; + + /// IVsByStride - Keep track of all IVs that have been inserted for a + /// particular stride. + std::map<const SCEV *, IVsOfOneStride> IVsByStride; + + /// DeadInsts - Keep track of instructions we may have made dead, so that + /// we can remove them after we are done working. + SmallVector<WeakVH, 16> DeadInsts; + + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *TLI; + + public: + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + LoopPass(&ID), TLI(tli) {} + + bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We split critical edges, so we change the CFG. However, we do update + // many analyses if they are around. + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved("loops"); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); + + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addRequired<IVUsers>(); + AU.addPreserved<IVUsers>(); + } + private: + void OptimizeIndvars(Loop *L); + + /// OptimizeLoopTermCond - Change loop terminating condition to use the + /// postinc iv when possible. + void OptimizeLoopTermCond(Loop *L); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast opeation. + void OptimizeShadowIV(Loop *L); + + /// OptimizeMax - Rewrite the loop's terminating condition + /// if it uses a max computation. + ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); + + /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for + /// deciding when to exit the loop is used only for that purpose, try to + /// rearrange things so it counts down to a test against zero. + bool OptimizeLoopCountIV(Loop *L); + bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, Loop *L); + + /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a + /// single stride of IV. All of the users may have different starting + /// values, and this may not be the only stride. + void StrengthReduceIVUsersOfStride(const SCEV *Stride, + IVUsersOfOneStride &Uses, + Loop *L); + void StrengthReduceIVUsers(Loop *L); + + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass = false); + + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, + const SCEV* &CondStride); + bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, + IVExpr&, const Type*, + const std::vector<BasedUser>& UsersToProcess); + bool ValidScale(bool, int64_t, + const std::vector<BasedUser>& UsersToProcess); + bool ValidOffset(bool, int64_t, int64_t, + const std::vector<BasedUser>& UsersToProcess); + const SCEV *CollectIVUsers(const SCEV *Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + bool &AllUsesAreOutsideLoop, + std::vector<BasedUser> &UsersToProcess); + bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); + bool ShouldUseFullStrengthReductionMode( + const std::vector<BasedUser> &UsersToProcess, + const Loop *L, + bool AllUsesAreAddresses, + const SCEV *Stride); + void PrepareToStrengthReduceFully( + std::vector<BasedUser> &UsersToProcess, + const SCEV *Stride, + const SCEV *CommonExprs, + const Loop *L, + SCEVExpander &PreheaderRewriter); + void PrepareToStrengthReduceFromSmallerStride( + std::vector<BasedUser> &UsersToProcess, + Value *CommonBaseV, + const IVExpr &ReuseIV, + Instruction *PreInsertPt); + void PrepareToStrengthReduceWithNewPhi( + std::vector<BasedUser> &UsersToProcess, + const SCEV *Stride, + const SCEV *CommonExprs, + Value *CommonBaseV, + Instruction *IVIncInsertPt, + const Loop *L, + SCEVExpander &PreheaderRewriter); + + void DeleteTriviallyDeadInstructions(); + }; } -void RegCount::print(raw_ostream &OS) const { - OS << *Reg << ':'; - Sort.print(OS); -} +char LoopStrengthReduce::ID = 0; +static RegisterPass<LoopStrengthReduce> +X("loop-reduce", "Loop Strength Reduction"); -void RegCount::dump() const { - print(errs()); errs() << '\n'; +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); } -namespace { - -/// Formula - This class holds information that describes a formula for -/// satisfying a use. It may include broken-out immediates and scaled registers. -struct Formula { - /// AM - This is used to represent complex addressing, as well as other kinds - /// of interesting uses. - TargetLowering::AddrMode AM; - - /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, AM.HasBaseReg should be set to true. - SmallVector<const SCEV *, 2> BaseRegs; - - /// ScaledReg - The 'scaled' register for this use. This should be non-null - /// when AM.Scale is not zero. - const SCEV *ScaledReg; - - Formula() : ScaledReg(0) {} - - unsigned getNumRegs() const; - uint32_t getComplexity() const; - const Type *getType() const; +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); - void InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; - /// referencesReg - Test if this formula references the given register. - bool referencesReg(const SCEV *S) const { - return S == ScaledReg || - std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); - } + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); + } - bool operator==(const Formula &Other) const { - return BaseRegs == Other.BaseRegs && - ScaledReg == Other.ScaledReg && - AM.Scale == Other.AM.Scale && - AM.BaseOffs == Other.AM.BaseOffs && - AM.BaseGV == Other.AM.BaseGV; + I->eraseFromParent(); + Changed = true; } +} - // This sorts at least partially based on host pointer values which are - // not deterministic, so it is only usable for uniqification. - bool operator<(const Formula &Other) const { - if (BaseRegs != Other.BaseRegs) - return BaseRegs < Other.BaseRegs; - if (ScaledReg != Other.ScaledReg) - return ScaledReg < Other.ScaledReg; - if (AM.Scale != Other.AM.Scale) - return AM.Scale < Other.AM.Scale; - if (AM.BaseOffs != Other.AM.BaseOffs) - return AM.BaseOffs < Other.AM.BaseOffs; - if (AM.BaseGV != Other.AM.BaseGV) - return AM.BaseGV < Other.AM.BaseGV; - return false; +/// isAddressUse - Returns true if the specified instruction is using the +/// specified value as an address. +static bool isAddressUse(Instruction *Inst, Value *OperandVal) { + bool isAddress = isa<LoadInst>(Inst); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->getOperand(1) == OperandVal) + isAddress = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::prefetch: + case Intrinsic::x86_sse2_loadu_dq: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + if (II->getOperand(1) == OperandVal) + isAddress = true; + break; + } } - - void print(raw_ostream &OS) const; - void dump() const; -}; - + return isAddress; } -/// getNumRegs - Return the total number of register operands used by this -/// formula. This does not include register uses implied by non-constant -/// addrec strides. -unsigned Formula::getNumRegs() const { - return !!ScaledReg + BaseRegs.size(); +/// getAccessType - Return the type of the memory being accessed. +static const Type *getAccessType(const Instruction *Inst) { + const Type *AccessTy = Inst->getType(); + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) + AccessTy = SI->getOperand(0)->getType(); + else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + AccessTy = II->getOperand(1)->getType(); + break; + } + } + return AccessTy; } -/// getComplexity - Return an oversimplified value indicating the complexity -/// of this formula. This is used as a tie-breaker in choosing register -/// preferences. -uint32_t Formula::getComplexity() const { - // Encode the information in a uint32_t so that comparing with operator< - // will be interesting. - return - // Most significant, the number of registers. This saturates because we - // need the bits, and because beyond a few hundred it doesn't really matter. - (std::min(getNumRegs(), (1u<<15)-1) << 17) | - // Having multiple base regs is worse than having a base reg and a scale. - ((BaseRegs.size() > 1) << 16) | - // Scale absolute value. - ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) | - // Scale sign, which is less significant than the absolute value. - ((AM.Scale < 0) << 8) | - // Offset absolute value. - ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) | - // If a GV is present, treat it like a maximal offset. - ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) | - // Offset sign, which is less significant than the absolute offset. - ((AM.BaseOffs < 0) << 0); +namespace { + /// BasedUser - For a particular base value, keep information about how we've + /// partitioned the expression so far. + struct BasedUser { + /// Base - The Base value for the PHI node that needs to be inserted for + /// this use. As the use is processed, information gets moved from this + /// field to the Imm field (below). BasedUser values are sorted by this + /// field. + const SCEV *Base; + + /// Inst - The instruction using the induction variable. + Instruction *Inst; + + /// OperandValToReplace - The operand value of Inst to replace with the + /// EmittedBase. + Value *OperandValToReplace; + + /// Imm - The immediate value that should be added to the base immediately + /// before Inst, because it will be folded into the imm field of the + /// instruction. This is also sometimes used for loop-variant values that + /// must be added inside the loop. + const SCEV *Imm; + + /// Phi - The induction variable that performs the striding that + /// should be used for this user. + PHINode *Phi; + + // isUseOfPostIncrementedValue - True if this should use the + // post-incremented version of this IV, not the preincremented version. + // This can only be set in special cases, such as the terminating setcc + // instruction for a loop and uses outside the loop that are dominated by + // the loop. + bool isUseOfPostIncrementedValue; + + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), + OperandValToReplace(IVSU.getOperandValToReplace()), + Imm(se->getIntegerSCEV(0, Base->getType())), + isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} + + // Once we rewrite the code to insert the new IVs we want, update the + // operands of Inst to use the new expression 'NewBase', with 'Imm' added + // to it. + void RewriteInstructionToUseNewBase(const SCEV *NewBase, + Instruction *InsertPt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE); + + Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, + ScalarEvolution *SE); + void dump() const; + }; } -/// getType - Return the type of this formula, if it has one, or null -/// otherwise. This type is meaningless except for the bit size. -const Type *Formula::getType() const { - return !BaseRegs.empty() ? BaseRegs.front()->getType() : - ScaledReg ? ScaledReg->getType() : - AM.BaseGV ? AM.BaseGV->getType() : - 0; +void BasedUser::dump() const { + dbgs() << " Base=" << *Base; + dbgs() << " Imm=" << *Imm; + dbgs() << " Inst: " << *Inst; } -namespace { +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, + ScalarEvolution *SE) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); -/// ComplexitySorter - A predicate which orders Formulae by the number of -/// registers they contain. -struct ComplexitySorter { - bool operator()(const Formula &LHS, const Formula &RHS) const { - unsigned L = LHS.getNumRegs(); - unsigned R = RHS.getNumRegs(); - if (L != R) return L < R; + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. + const SCEV *NewValSCEV = SE->getUnknown(Base); - return LHS.getComplexity() < RHS.getComplexity(); - } -}; + // Always emit the immediate into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); + return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } -/// DoInitialMatch - Recurrsion helper for InitialMatch. -static void DoInitialMatch(const SCEV *S, Loop *L, - SmallVectorImpl<const SCEV *> &Good, - SmallVectorImpl<const SCEV *> &Bad, - ScalarEvolution &SE, DominatorTree &DT) { - // Collect expressions which properly dominate the loop header. - if (S->properlyDominates(L->getHeader(), &DT)) { - Good.push_back(S); - return; - } - - // Look at add operands. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - DoInitialMatch(*I, L, Good, Bad, SE, DT); - return; - } - // Look at addrec operands. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - if (!AR->getStart()->isZero()) { - DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); - DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), - AR->getStepRecurrence(SE), - AR->getLoop()), - L, Good, Bad, SE, DT); - return; +// Once we rewrite the code to insert the new IVs we want, update the +// operands of Inst to use the new expression 'NewBase', with 'Imm' added +// to it. NewBasePt is the last instruction which contributes to the +// value of NewBase in the case that it's a diffferent instruction from +// the PHI that NewBase is computed from, or null otherwise. +// +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, + Instruction *NewBasePt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE) { + if (!isa<PHINode>(Inst)) { + // By default, insert code at the user instruction. + BasicBlock::iterator InsertPt = Inst; + + // However, if the Operand is itself an instruction, the (potentially + // complex) inserted code may be shared by many users. Because of this, we + // want to emit code for the computation of the operand right before its old + // computation. This is usually safe, because we obviously used to use the + // computation when it was computed in its current block. However, in some + // cases (e.g. use of a post-incremented induction variable) the NewBase + // value will be pinned to live somewhere after the original computation. + // In this case, we have to back off. + // + // If this is a use outside the loop (which means after, since it is based + // on a loop indvar) we use the post-incremented value, so that we don't + // artificially make the preinc value live out the bottom of the loop. + if (!isUseOfPostIncrementedValue && L->contains(Inst)) { + if (NewBasePt && isa<PHINode>(OperandValToReplace)) { + InsertPt = NewBasePt; + ++InsertPt; + } else if (Instruction *OpInst + = dyn_cast<Instruction>(OperandValToReplace)) { + InsertPt = OpInst; + while (isa<PHINode>(InsertPt)) ++InsertPt; + } } + Value *NewVal = InsertCodeForBaseAtPosition(NewBase, + OperandValToReplace->getType(), + Rewriter, InsertPt, SE); + // Replace the use of the operand Value with the new Phi we just created. + Inst->replaceUsesOfWith(OperandValToReplace, NewVal); + + DEBUG(dbgs() << " Replacing with "); + DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); + DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); + return; } - // Handle a multiplication by -1 (negation) if it didn't fold. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) - if (Mul->getOperand(0)->isAllOnesValue()) { - SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); - const SCEV *NewMul = SE.getMulExpr(Ops); - - SmallVector<const SCEV *, 4> MyGood; - SmallVector<const SCEV *, 4> MyBad; - DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); - const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( - SE.getE |