diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 4623 |
1 files changed, 2268 insertions, 2355 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index acc66246c3..fa820ed8e4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -17,45 +17,6 @@ // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // -// Terminology note: this code has a lot of handling for "post-increment" or -// "post-inc" users. This is not talking about post-increment addressing modes; -// it is instead talking about code like this: -// -// %i = phi [ 0, %entry ], [ %i.next, %latch ] -// ... -// %i.next = add %i, 1 -// %c = icmp eq %i.next, %n -// -// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however -// it's useful to think about these as the same register, with some uses using -// the value of the register before the add and some using // it after. In this -// example, the icmp is a post-increment user, since it uses %i.next, which is -// the value of the induction variable after the increment. The other common -// case of post-increment users is users outside the loop. -// -// TODO: More sophistication in the way Formulae are generated. -// -// TODO: Handle multiple loops at a time. -// -// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is -// that {0,+,1}<%bb> is getting picked first because all 7 uses can -// use it, and while it's a pretty good solution, it means that LSR -// doesn't look further to find an even better solution. -// -// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr -// instead of a GlobalValue? -// -// TODO: When truncation is free, truncate ICmp users' operands to make it a -// smaller encoding (on x86 at least). -// -// TODO: When a negated register is used by an add (such as in a list of -// multiple base registers, or as the increment expression in an addrec), -// we may not actually need both reg and (-1 * reg) in registers; the -// negation can be implemented by using a sub instead of an add. The -// lack of support for taking this into consideration when making -// register pressure decisions is partly worked around by the "Special" -// use kind. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" @@ -65,1549 +26,2025 @@ #include "llvm/IntrinsicInst.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; -namespace { - -// Constant strides come first which in turns are sorted by their absolute -// values. If absolute values are the same, then positive strides comes first. -// e.g. -// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X -struct StrideCompare { - const ScalarEvolution &SE; - explicit StrideCompare(const ScalarEvolution &se) : SE(se) {} - - bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const { - const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); - const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); - if (LHSC && RHSC) { - unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()), - SE.getTypeSizeInBits(RHS->getType())); - APInt LV = LHSC->getValue()->getValue(); - APInt RV = RHSC->getValue()->getValue(); - LV.sextOrTrunc(BitWidth); - RV.sextOrTrunc(BitWidth); - APInt ALV = LV.abs(); - APInt ARV = RV.abs(); - if (ALV == ARV) { - if (LV != RV) - return LV.sgt(RV); - } else { - return ALV.ult(ARV); - } - - // If it's the same value but different type, sort by bit width so - // that we emit larger induction variables before smaller - // ones, letting the smaller be re-written in terms of larger ones. - return SE.getTypeSizeInBits(RHS->getType()) < - SE.getTypeSizeInBits(LHS->getType()); - } - return LHSC && !RHSC; - } -}; - -/// RegSortData - This class holds data which is used to order reuse -/// candidates. -class RegSortData { -public: - /// Bits - This represents the set of LSRUses (by index) which reference a - /// particular register. - SmallBitVector Bits; - - /// MaxComplexity - This represents the greatest complexity (see the comments - /// on Formula::getComplexity) seen with a particular register. - uint32_t MaxComplexity; - - /// Index - This holds an arbitrary value used as a last-resort tie breaker - /// to ensure deterministic behavior. - unsigned Index; - - RegSortData() {} - - void print(raw_ostream &OS) const; - void dump() const; -}; +STATISTIC(NumReduced , "Number of IV uses strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); +STATISTIC(NumEliminated, "Number of strides eliminated"); +STATISTIC(NumShadow, "Number of Shadow IVs optimized"); +STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); +STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); +STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); -} - -void RegSortData::print(raw_ostream &OS) const { - OS << "[NumUses=" << Bits.count() - << ", MaxComplexity="; - OS.write_hex(MaxComplexity); - OS << ", Index=" << Index << ']'; -} - -void RegSortData::dump() const { - print(errs()); errs() << '\n'; -} +static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", + cl::init(false), + cl::Hidden); namespace { -/// RegCount - This is a helper class to sort a given set of registers -/// according to associated RegSortData values. -class RegCount { -public: - const SCEV *Reg; - RegSortData Sort; - - RegCount(const SCEV *R, const RegSortData &RSD) - : Reg(R), Sort(RSD) {} - - // Sort by count. Returning true means the register is preferred. - bool operator<(const RegCount &Other) const { - // Sort by the number of unique uses of this register. - unsigned A = Sort.Bits.count(); - unsigned B = Other.Sort.Bits.count(); - if (A != B) return A > B; - - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { - const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg); - // AddRecs have higher priority than other things. - if (!BR) return true; - - // Prefer affine values. - if (AR->isAffine() != BR->isAffine()) - return AR->isAffine(); - - const Loop *AL = AR->getLoop(); - const Loop *BL = BR->getLoop(); - if (AL != BL) { - unsigned ADepth = AL->getLoopDepth(); - unsigned BDepth = BL->getLoopDepth(); - // Prefer a less deeply nested addrec. - if (ADepth != BDepth) - return ADepth < BDepth; - - // Different loops at the same depth; do something arbitrary. - BasicBlock *AH = AL->getHeader(); - BasicBlock *BH = BL->getHeader(); - for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I) - if (&*I == BH) return true; - return false; - } + struct BasedUser; - // Sort addrecs by stride. - const SCEV *AStep = AR->getOperand(1); - const SCEV *BStep = BR->getOperand(1); - if (AStep != BStep) { - if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) { - const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep); - if (!BC) return true; - // Arbitrarily prefer wider registers. - if (AC->getValue()->getValue().getBitWidth() != - BC->getValue()->getValue().getBitWidth()) - return AC->getValue()->getValue().getBitWidth() > - BC->getValue()->getValue().getBitWidth(); - // Ignore the sign bit, assuming that striding by a negative value - // is just as easy as by a positive value. - // Prefer the addrec with the lesser absolute value stride, as it - // will allow uses to have simpler addressing modes. - return AC->getValue()->getValue().abs() - .ult(BC->getValue()->getValue().abs()); - } - } + /// IVInfo - This structure keeps track of one IV expression inserted during + /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as + /// well as the PHI node and increment value created for rewrite. + struct IVExpr { + const SCEV *Stride; + const SCEV *Base; + PHINode *PHI; - // Then sort by the register which will permit the simplest uses. - // This is a heuristic; currently we only track the most complex use as a - // representative. - if (Sort.MaxComplexity != Other.Sort.MaxComplexity) - return Sort.MaxComplexity < Other.Sort.MaxComplexity; - - // Then sort them by their start values. - const SCEV *AStart = AR->getStart(); - const SCEV *BStart = BR->getStart(); - if (AStart != BStart) { - if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) { - const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart); - if (!BC) return true; - // Arbitrarily prefer wider registers. - if (AC->getValue()->getValue().getBitWidth() != - BC->getValue()->getValue().getBitWidth()) - return AC->getValue()->getValue().getBitWidth() > - BC->getValue()->getValue().getBitWidth(); - // Prefer positive over negative if the absolute values are the same. - if (AC->getValue()->getValue().abs() == - BC->getValue()->getValue().abs()) - return AC->getValue()->getValue().isStrictlyPositive(); - // Prefer the addrec with the lesser absolute value start. - return AC->getValue()->getValue().abs() - .ult(BC->getValue()->getValue().abs()); - } - } - } else { - // AddRecs have higher priority than other things. - if (isa<SCEVAddRecExpr>(Other.Reg)) return false; - // Sort by the register which will permit the simplest uses. - // This is a heuristic; currently we only track the most complex use as a - // representative. - if (Sort.MaxComplexity != Other.Sort.MaxComplexity) - return Sort.MaxComplexity < Other.Sort.MaxComplexity; - } + IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) + : Stride(stride), Base(base), PHI(phi) {} + }; + /// IVsOfOneStride - This structure keeps track of all IV expression inserted + /// during StrengthReduceStridedIVUsers for a particular stride of the IV. + struct IVsOfOneStride { + std::vector<IVExpr> IVs; - // Tie-breaker: the arbitrary index, to ensure a reliable ordering. - return Sort.Index < Other.Sort.Index; - } + void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { + IVs.push_back(IVExpr(Stride, Base, PHI)); + } + }; - void print(raw_ostream &OS) const; - void dump() const; -}; + class LoopStrengthReduce : public LoopPass { + IVUsers *IU; + ScalarEvolution *SE; + bool Changed; + + /// IVsByStride - Keep track of all IVs that have been inserted for a + /// particular stride. + std::map<const SCEV *, IVsOfOneStride> IVsByStride; + + /// DeadInsts - Keep track of instructions we may have made dead, so that + /// we can remove them after we are done working. + SmallVector<WeakVH, 16> DeadInsts; + + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *TLI; + + public: + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + LoopPass(&ID), TLI(tli) {} + + bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We split critical edges, so we change the CFG. However, we do update + // many analyses if they are around. + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved("loops"); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); + + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addRequired<IVUsers>(); + AU.addPreserved<IVUsers>(); + } + private: + void OptimizeIndvars(Loop *L); + + /// OptimizeLoopTermCond - Change loop terminating condition to use the + /// postinc iv when possible. + void OptimizeLoopTermCond(Loop *L); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast opeation. + void OptimizeShadowIV(Loop *L); + + /// OptimizeMax - Rewrite the loop's terminating condition + /// if it uses a max computation. + ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); + + /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for + /// deciding when to exit the loop is used only for that purpose, try to + /// rearrange things so it counts down to a test against zero. + bool OptimizeLoopCountIV(Loop *L); + bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, Loop *L); + + /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a + /// single stride of IV. All of the users may have different starting + /// values, and this may not be the only stride. + void StrengthReduceIVUsersOfStride(const SCEV *Stride, + IVUsersOfOneStride &Uses, + Loop *L); + void StrengthReduceIVUsers(Loop *L); + + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass = false); + + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, + const SCEV* &CondStride); + bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, + IVExpr&, const Type*, + const std::vector<BasedUser>& UsersToProcess); + bool ValidScale(bool, int64_t, + const std::vector<BasedUser>& UsersToProcess); + bool ValidOffset(bool, int64_t, int64_t, + const std::vector<BasedUser>& UsersToProcess); + const SCEV *CollectIVUsers(const SCEV *Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + bool &AllUsesAreOutsideLoop, + std::vector<BasedUser> &UsersToProcess); + bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); + bool ShouldUseFullStrengthReductionMode( + const std::vector<BasedUser> &UsersToProcess, + const Loop *L, + bool AllUsesAreAddresses, + const SCEV *Stride); + void PrepareToStrengthReduceFully( + std::vector<BasedUser> &UsersToProcess, + const SCEV *Stride, + const SCEV *CommonExprs, + const Loop *L, + SCEVExpander &PreheaderRewriter); + void PrepareToStrengthReduceFromSmallerStride( + std::vector<BasedUser> &UsersToProcess, + Value *CommonBaseV, + const IVExpr &ReuseIV, + Instruction *PreInsertPt); + void PrepareToStrengthReduceWithNewPhi( + std::vector<BasedUser> &UsersToProcess, + const SCEV *Stride, + const SCEV *CommonExprs, + Value *CommonBaseV, + Instruction *IVIncInsertPt, + const Loop *L, + SCEVExpander &PreheaderRewriter); + + void DeleteTriviallyDeadInstructions(); + }; } -void RegCount::print(raw_ostream &OS) const { - OS << *Reg << ':'; - Sort.print(OS); -} +char LoopStrengthReduce::ID = 0; +static RegisterPass<LoopStrengthReduce> +X("loop-reduce", "Loop Strength Reduction"); -void RegCount::dump() const { - print(errs()); errs() << '\n'; +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); } -namespace { - -/// Formula - This class holds information that describes a formula for -/// satisfying a use. It may include broken-out immediates and scaled registers. -struct Formula { - /// AM - This is used to represent complex addressing, as well as other kinds - /// of interesting uses. - TargetLowering::AddrMode AM; - - /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, AM.HasBaseReg should be set to true. - SmallVector<const SCEV *, 2> BaseRegs; - - /// ScaledReg - The 'scaled' register for this use. This should be non-null - /// when AM.Scale is not zero. - const SCEV *ScaledReg; - - Formula() : ScaledReg(0) {} - - unsigned getNumRegs() const; - uint32_t getComplexity() const; - const Type *getType() const; +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); - void InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; - /// referencesReg - Test if this formula references the given register. - bool referencesReg(const SCEV *S) const { - return S == ScaledReg || - std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); - } + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); + } - bool operator==(const Formula &Other) const { - return BaseRegs == Other.BaseRegs && - ScaledReg == Other.ScaledReg && - AM.Scale == Other.AM.Scale && - AM.BaseOffs == Other.AM.BaseOffs && - AM.BaseGV == Other.AM.BaseGV; + I->eraseFromParent(); + Changed = true; } +} - // This sorts at least partially based on host pointer values which are - // not deterministic, so it is only usable for uniqification. - bool operator<(const Formula &Other) const { - if (BaseRegs != Other.BaseRegs) - return BaseRegs < Other.BaseRegs; - if (ScaledReg != Other.ScaledReg) - return ScaledReg < Other.ScaledReg; - if (AM.Scale != Other.AM.Scale) - return AM.Scale < Other.AM.Scale; - if (AM.BaseOffs != Other.AM.BaseOffs) - return AM.BaseOffs < Other.AM.BaseOffs; - if (AM.BaseGV != Other.AM.BaseGV) - return AM.BaseGV < Other.AM.BaseGV; - return false; +/// isAddressUse - Returns true if the specified instruction is using the +/// specified value as an address. +static bool isAddressUse(Instruction *Inst, Value *OperandVal) { + bool isAddress = isa<LoadInst>(Inst); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->getOperand(1) == OperandVal) + isAddress = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::prefetch: + case Intrinsic::x86_sse2_loadu_dq: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + if (II->getOperand(1) == OperandVal) + isAddress = true; + break; + } } - - void print(raw_ostream &OS) const; - void dump() const; -}; - + return isAddress; } -/// getNumRegs - Return the total number of register operands used by this -/// formula. This does not include register uses implied by non-constant -/// addrec strides. -unsigned Formula::getNumRegs() const { - return !!ScaledReg + BaseRegs.size(); +/// getAccessType - Return the type of the memory being accessed. +static const Type *getAccessType(const Instruction *Inst) { + const Type *AccessTy = Inst->getType(); + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) + AccessTy = SI->getOperand(0)->getType(); + else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + AccessTy = II->getOperand(1)->getType(); + break; + } + } + return AccessTy; } -/// getComplexity - Return an oversimplified value indicating the complexity -/// of this formula. This is used as a tie-breaker in choosing register -/// preferences. -uint32_t Formula::getComplexity() const { - // Encode the information in a uint32_t so that comparing with operator< - // will be interesting. - return - // Most significant, the number of registers. This saturates because we - // need the bits, and because beyond a few hundred it doesn't really matter. - (std::min(getNumRegs(), (1u<<15)-1) << 17) | - // Having multiple base regs is worse than having a base reg and a scale. - ((BaseRegs.size() > 1) << 16) | - // Scale absolute value. - ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) | - // Scale sign, which is less significant than the absolute value. - ((AM.Scale < 0) << 8) | - // Offset absolute value. - ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) | - // If a GV is present, treat it like a maximal offset. - ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) | - // Offset sign, which is less significant than the absolute offset. - ((AM.BaseOffs < 0) << 0); +namespace { + /// BasedUser - For a particular base value, keep information about how we've + /// partitioned the expression so far. + struct BasedUser { + /// Base - The Base value for the PHI node that needs to be inserted for + /// this use. As the use is processed, information gets moved from this + /// field to the Imm field (below). BasedUser values are sorted by this + /// field. + const SCEV *Base; + + /// Inst - The instruction using the induction variable. + Instruction *Inst; + + /// OperandValToReplace - The operand value of Inst to replace with the + /// EmittedBase. + Value *OperandValToReplace; + + /// Imm - The immediate value that should be added to the base immediately + /// before Inst, because it will be folded into the imm field of the + /// instruction. This is also sometimes used for loop-variant values that + /// must be added inside the loop. + const SCEV *Imm; + + /// Phi - The induction variable that performs the striding that + /// should be used for this user. + PHINode *Phi; + + // isUseOfPostIncrementedValue - True if this should use the + // post-incremented version of this IV, not the preincremented version. + // This can only be set in special cases, such as the terminating setcc + // instruction for a loop and uses outside the loop that are dominated by + // the loop. + bool isUseOfPostIncrementedValue; + + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), + OperandValToReplace(IVSU.getOperandValToReplace()), + Imm(se->getIntegerSCEV(0, Base->getType())), + isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} + + // Once we rewrite the code to insert the new IVs we want, update the + // operands of Inst to use the new expression 'NewBase', with 'Imm' added + // to it. + void RewriteInstructionToUseNewBase(const SCEV *NewBase, + Instruction *InsertPt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE); + + Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, + ScalarEvolution *SE); + void dump() const; + }; } -/// getType - Return the type of this formula, if it has one, or null -/// otherwise. This type is meaningless except for the bit size. -const Type *Formula::getType() const { - return !BaseRegs.empty() ? BaseRegs.front()->getType() : - ScaledReg ? ScaledReg->getType() : - AM.BaseGV ? AM.BaseGV->getType() : - 0; +void BasedUser::dump() const { + dbgs() << " Base=" << *Base; + dbgs() << " Imm=" << *Imm; + dbgs() << " Inst: " << *Inst; } -namespace { +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, + const Type *Ty, + SCEVExpander &Rewriter, + Instruction *IP, + ScalarEvolution *SE) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); -/// ComplexitySorter - A predicate which orders Formulae by the number of -/// registers they contain. -struct ComplexitySorter { - bool operator()(const Formula &LHS, const Formula &RHS) const { - unsigned L = LHS.getNumRegs(); - unsigned R = RHS.getNumRegs(); - if (L != R) return L < R; + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. + const SCEV *NewValSCEV = SE->getUnknown(Base); - return LHS.getComplexity() < RHS.getComplexity(); - } -}; + // Always emit the immediate into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); + return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } -/// DoInitialMatch - Recurrsion helper for InitialMatch. -static void DoInitialMatch(const SCEV *S, Loop *L, - SmallVectorImpl<const SCEV *> &Good, - SmallVectorImpl<const SCEV *> &Bad, - ScalarEvolution &SE, DominatorTree &DT) { - // Collect expressions which properly dominate the loop header. - if (S->properlyDominates(L->getHeader(), &DT)) { - Good.push_back(S); - return; - } - - // Look at add operands. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - DoInitialMatch(*I, L, Good, Bad, SE, DT); - return; - } - // Look at addrec operands. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - if (!AR->getStart()->isZero()) { - DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); - DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), - AR->getStepRecurrence(SE), - AR->getLoop()), - L, Good, Bad, SE, DT); - return; +// Once we rewrite the code to insert the new IVs we want, update the +// operands of Inst to use the new expression 'NewBase', with 'Imm' added +// to it. NewBasePt is the last instruction which contributes to the +// value of NewBase in the case that it's a diffferent instruction from +// the PHI that NewBase is computed from, or null otherwise. +// +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, + Instruction *NewBasePt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE) { + if (!isa<PHINode>(Inst)) { + // By default, insert code at the user instruction. + BasicBlock::iterator InsertPt = Inst; + + // However, if the Operand is itself an instruction, the (potentially + // complex) inserted code may be shared by many users. Because of this, we + // want to emit code for the computation of the operand right before its old + // computation. This is usually safe, because we obviously used to use the + // computation when it was computed in its current block. However, in some + // cases (e.g. use of a post-incremented induction variable) the NewBase + // value will be pinned to live somewhere after the original computation. + // In this case, we have to back off. + // + // If this is a use outside the loop (which means after, since it is based + // on a loop indvar) we use the post-incremented value, so that we don't + // artificially make the preinc value live out the bottom of the loop. + if (!isUseOfPostIncrementedValue && L->contains(Inst)) { + if (NewBasePt && isa<PHINode>(OperandValToReplace)) { + InsertPt = NewBasePt; + ++InsertPt; + } else if (Instruction *OpInst + = dyn_cast<Instruction>(OperandValToReplace)) { + InsertPt = OpInst; + while (isa<PHINode>(InsertPt)) ++InsertPt; + } } + Value *NewVal = InsertCodeForBaseAtPosition(NewBase, + OperandValToReplace->getType(), + Rewriter, InsertPt, SE); + // Replace the use of the operand Value with the new Phi we just created. + Inst->replaceUsesOfWith(OperandValToReplace, NewVal); + + DEBUG(dbgs() << " Replacing with "); + DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); + DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); + return; } - // Handle a multiplication by -1 (negation) if it didn't fold. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) - if (Mul->getOperand(0)->isAllOnesValue()) { - SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); - const SCEV *NewMul = SE.getMulExpr(Ops); - - SmallVector<const SCEV *, 4> MyGood; - SmallVector<const SCEV *, 4> MyBad; - DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); - const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( - SE.getEffectiveSCEVType(NewMul->getType()))); - for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), - E = MyGood.end(); I != E; ++I) - Good.push_back(SE.getMulExpr(NegOne, *I)); - for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(), - E = MyBad.end(); I != E; ++I) - Bad.push_back(SE.getMulExpr(NegOne, *I)); - return; - } - - // Ok, we can't do anything interesting. Just stuff the whole thing into a - // register and hope for the best. - Bad.push_back(S); -} + // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm + // expression into each operand block that uses it. Note that PHI nodes can + // have multiple entries for the same predecessor. We use a map to make sure + // that a PHI node only has a single Value* for each predecessor (which also + // prevents us from inserting duplicate code in some blocks). + DenseMap<BasicBlock*, Value*> InsertedCode; + PHINode *PN = cast<PHINode>(Inst); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) == OperandValToReplace) { + // If the original expression is outside the loop, put the replacement + // code in the same place as the original expression, + // which need not be an immediate predecessor of this PHI. This way we + // need only one copy of it even if it is referenced multiple times in + // the PHI. We don't do this when the original expression is inside the + // loop because multiple copies sometimes do useful sinking of code in + // that case(?). + Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); + BasicBlock *PHIPred = PN->getIncomingBlock(i); + if (L->contains(OldLoc)) { + // If this is a critical edge, split the edge so that we do not insert + // the code on all predecessor/successor paths. We do this unless this + // is the canonical backedge for this loop, as this can make some + // inserted code be in an illegal position. + if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && + !isa<IndirectBrInst>(PHIPred->getTerminator()) && + (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { + + // First step, split the critical edge. + BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), + P, false); + + // Next step: move the basic block. In particular, if the PHI node + // is outside of the loop, and PredTI is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after PredTI. + if (L->contains(PHIPred) && !L->contains(PN)) + NewBB->moveBefore(PN->getParent()); -/// InitialMatch - Incorporate loop-variant parts of S into this Formula, -/// attempting to keep all loop-invariant and loop-computable values in a -/// single base register. -void Formula::InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { - SmallVector<const SCEV *, 4> Good; - SmallVector<const SCEV *, 4> Bad; - DoInitialMatch(S, L, Good, Bad, SE, DT); - if (!Good.empty()) { - BaseRegs.push_back(SE.getAddExpr(Good)); - AM.HasBaseReg = true; - } - if (!Bad.empty()) { - BaseRegs.push_back(SE.getAddExpr(Bad)); - AM.HasBaseReg = true; - } -} + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + PHIPred = NewBB; + i = PN->getBasicBlockIndex(PHIPred); + } + } + Value *&Code = InsertedCode[PHIPred]; + if (!Code) { + // Insert the code into the end of the predecessor block. + Instruction *InsertPt = (L->contains(OldLoc)) ? + PHIPred->getTerminator() : + OldLoc->getParent()->getTerminator(); + Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), + Rewriter, InsertPt, SE); + + DEBUG(dbgs() << " Changing PHI use to "); + DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false)); + DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); + } -void Formula::print(raw_ostream &OS) const { - bool First = true; - if (AM.BaseGV) { - if (!First) OS << " + "; else First = false; - WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); - } - if (AM.BaseOffs != 0) { - if (!First) OS << " + "; else First = false; - OS << AM.BaseOffs; - } - for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), - E = BaseRegs.end(); I != E; ++I) { - if (!First) OS << " + "; else First = false; - OS << "reg("; - OS << **I; - OS << ")"; - } - if (AM.Scale != 0) { - if (!First) OS << " + "; else First = false; - OS << AM.Scale << "*reg("; - if (ScaledReg) - OS << *ScaledReg; - else - OS << "<unknown>"; - OS << ")"; + // Replace the use of the operand Value with the new Phi we just created. + PN->setIncomingValue(i, Code); + Rewriter.clear(); + } } -} -void Formula::dump() const { - print(errs()); errs() << '\n'; + // PHI node might have become a constant value after SplitCriticalEdge. + DeadInsts.push_back(Inst); } -/// getSDiv - Return an expression for LHS /s RHS, if it can be determined, -/// or null otherwise. If IgnoreSignificantBits is true, expressions like -/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may -/// overflow, which is useful when the result will be used in a context where -/// the most significant bits are ignored. -static const SCEV *getSDiv(c |