aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp4574
1 files changed, 2305 insertions, 2269 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fa820ed8e4..6456ca1eb0 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -17,6 +17,45 @@
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+// %i = phi [ 0, %entry ], [ %i.next, %latch ]
+// ...
+// %i.next = add %i, 1
+// %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
+// that {0,+,1}<%bb> is getting picked first because all 7 uses can
+// use it, and while it's a pretty good solution, it means that LSR
+// doesn't look further to find an even better solution.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+// smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+// multiple base registers, or as the increment expression in an addrec),
+// we may not actually need both reg and (-1 * reg) in registers; the
+// negation can be implemented by using a sub instead of an add. The
+// lack of support for taking this into consideration when making
+// register pressure decisions is partly worked around by the "Special"
+// use kind.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
@@ -26,2025 +65,1524 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
-STATISTIC(NumReduced , "Number of IV uses strength reduced");
-STATISTIC(NumInserted, "Number of PHIs inserted");
-STATISTIC(NumVariable, "Number of PHIs with variable strides");
-STATISTIC(NumEliminated, "Number of strides eliminated");
-STATISTIC(NumShadow, "Number of Shadow IVs optimized");
-STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
-STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
-STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
-
-static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
- cl::init(false),
- cl::Hidden);
-
namespace {
- struct BasedUser;
+// Constant strides come first which in turns are sorted by their absolute
+// values. If absolute values are the same, then positive strides comes first.
+// e.g.
+// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
+struct StrideCompare {
+ const ScalarEvolution &SE;
+ explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
+
+ bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
+ const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
+ const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
+ if (LHSC && RHSC) {
+ unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
+ SE.getTypeSizeInBits(RHS->getType()));
+ APInt LV = LHSC->getValue()->getValue();
+ APInt RV = RHSC->getValue()->getValue();
+ LV.sextOrTrunc(BitWidth);
+ RV.sextOrTrunc(BitWidth);
+ APInt ALV = LV.abs();
+ APInt ARV = RV.abs();
+ if (ALV == ARV) {
+ if (LV != RV)
+ return LV.sgt(RV);
+ } else {
+ return ALV.ult(ARV);
+ }
+
+ // If it's the same value but different type, sort by bit width so
+ // that we emit larger induction variables before smaller
+ // ones, letting the smaller be re-written in terms of larger ones.
+ return SE.getTypeSizeInBits(RHS->getType()) <
+ SE.getTypeSizeInBits(LHS->getType());
+ }
+ return LHSC && !RHSC;
+ }
+};
- /// IVInfo - This structure keeps track of one IV expression inserted during
- /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
- /// well as the PHI node and increment value created for rewrite.
- struct IVExpr {
- const SCEV *Stride;
- const SCEV *Base;
- PHINode *PHI;
+/// RegSortData - This class holds data which is used to order reuse
+/// candidates.
+class RegSortData {
+public:
+ /// Bits - This represents the set of LSRUses (by index) which reference a
+ /// particular register.
+ SmallBitVector Bits;
- IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
- : Stride(stride), Base(base), PHI(phi) {}
- };
+ /// MaxComplexity - This represents the greatest complexity (see the comments
+ /// on Formula::getComplexity) seen with a particular register.
+ uint32_t MaxComplexity;
- /// IVsOfOneStride - This structure keeps track of all IV expression inserted
- /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
- struct IVsOfOneStride {
- std::vector<IVExpr> IVs;
+ /// Index - This holds an arbitrary value used as a last-resort tie breaker
+ /// to ensure deterministic behavior.
+ unsigned Index;
- void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
- IVs.push_back(IVExpr(Stride, Base, PHI));
- }
- };
+ RegSortData() {}
- class LoopStrengthReduce : public LoopPass {
- IVUsers *IU;
- ScalarEvolution *SE;
- bool Changed;
-
- /// IVsByStride - Keep track of all IVs that have been inserted for a
- /// particular stride.
- std::map<const SCEV *, IVsOfOneStride> IVsByStride;
-
- /// DeadInsts - Keep track of instructions we may have made dead, so that
- /// we can remove them after we are done working.
- SmallVector<WeakVH, 16> DeadInsts;
-
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *TLI;
-
- public:
- static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
- LoopPass(&ID), TLI(tli) {}
-
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // We split critical edges, so we change the CFG. However, we do update
- // many analyses if they are around.
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved("loops");
- AU.addPreserved("domfrontier");
- AU.addPreserved("domtree");
-
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
- AU.addRequired<IVUsers>();
- AU.addPreserved<IVUsers>();
- }
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
- private:
- void OptimizeIndvars(Loop *L);
-
- /// OptimizeLoopTermCond - Change loop terminating condition to use the
- /// postinc iv when possible.
- void OptimizeLoopTermCond(Loop *L);
-
- /// OptimizeShadowIV - If IV is used in a int-to-float cast
- /// inside the loop then try to eliminate the cast opeation.
- void OptimizeShadowIV(Loop *L);
-
- /// OptimizeMax - Rewrite the loop's terminating condition
- /// if it uses a max computation.
- ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse);
-
- /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
- /// deciding when to exit the loop is used only for that purpose, try to
- /// rearrange things so it counts down to a test against zero.
- bool OptimizeLoopCountIV(Loop *L);
- bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
- IVStrideUse* &CondUse, Loop *L);
-
- /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
- /// single stride of IV. All of the users may have different starting
- /// values, and this may not be the only stride.
- void StrengthReduceIVUsersOfStride(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
- void StrengthReduceIVUsers(Loop *L);
-
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV* &CondStride,
- bool PostPass = false);
-
- bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV* &CondStride);
- bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
- const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
- IVExpr&, const Type*,
- const std::vector<BasedUser>& UsersToProcess);
- bool ValidScale(bool, int64_t,
- const std::vector<BasedUser>& UsersToProcess);
- bool ValidOffset(bool, int64_t, int64_t,
- const std::vector<BasedUser>& UsersToProcess);
- const SCEV *CollectIVUsers(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L,
- bool &AllUsesAreAddresses,
- bool &AllUsesAreOutsideLoop,
- std::vector<BasedUser> &UsersToProcess);
- bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
- bool ShouldUseFullStrengthReductionMode(
- const std::vector<BasedUser> &UsersToProcess,
- const Loop *L,
- bool AllUsesAreAddresses,
- const SCEV *Stride);
- void PrepareToStrengthReduceFully(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- const Loop *L,
- SCEVExpander &PreheaderRewriter);
- void PrepareToStrengthReduceFromSmallerStride(
- std::vector<BasedUser> &UsersToProcess,
- Value *CommonBaseV,
- const IVExpr &ReuseIV,
- Instruction *PreInsertPt);
- void PrepareToStrengthReduceWithNewPhi(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- Value *CommonBaseV,
- Instruction *IVIncInsertPt,
- const Loop *L,
- SCEVExpander &PreheaderRewriter);
-
- void DeleteTriviallyDeadInstructions();
- };
}
-char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+void RegSortData::print(raw_ostream &OS) const {
+ OS << "[NumUses=" << Bits.count()
+ << ", MaxComplexity=";
+ OS.write_hex(MaxComplexity);
+ OS << ", Index=" << Index << ']';
+}
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+void RegSortData::dump() const {
+ print(errs()); errs() << '\n';
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
-void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
- while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+namespace {
- if (I == 0 || !isInstructionTriviallyDead(I))
- continue;
+/// RegCount - This is a helper class to sort a given set of registers
+/// according to associated RegSortData values.
+class RegCount {
+public:
+ const SCEV *Reg;
+ RegSortData Sort;
+
+ RegCount(const SCEV *R, const RegSortData &RSD)
+ : Reg(R), Sort(RSD) {}
+
+ // Sort by count. Returning true means the register is preferred.
+ bool operator<(const RegCount &Other) const {
+ // Sort by the number of unique uses of this register.
+ unsigned A = Sort.Bits.count();
+ unsigned B = Other.Sort.Bits.count();
+ if (A != B) return A > B;
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
+ // AddRecs have higher priority than other things.
+ if (!BR) return true;
+
+ // Prefer affine values.
+ if (AR->isAffine() != BR->isAffine())
+ return AR->isAffine();
+
+ const Loop *AL = AR->getLoop();
+ const Loop *BL = BR->getLoop();
+ if (AL != BL) {
+ unsigned ADepth = AL->getLoopDepth();
+ unsigned BDepth = BL->getLoopDepth();
+ // Prefer a less deeply nested addrec.
+ if (ADepth != BDepth)
+ return ADepth < BDepth;
+
+ // Different loops at the same depth; do something arbitrary.
+ BasicBlock *AH = AL->getHeader();
+ BasicBlock *BH = BL->getHeader();
+ for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
+ if (&*I == BH) return true;
+ return false;
+ }
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
- if (U->use_empty())
- DeadInsts.push_back(U);
+ // Sort addrecs by stride.
+ const SCEV *AStep = AR->getOperand(1);
+ const SCEV *BStep = BR->getOperand(1);
+ if (AStep != BStep) {
+ if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
+ const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
+ if (!BC) return true;
+ // Arbitrarily prefer wider registers.
+ if (AC->getValue()->getValue().getBitWidth() !=
+ BC->getValue()->getValue().getBitWidth())
+ return AC->getValue()->getValue().getBitWidth() >
+ BC->getValue()->getValue().getBitWidth();
+ // Ignore the sign bit, assuming that striding by a negative value
+ // is just as easy as by a positive value.
+ // Prefer the addrec with the lesser absolute value stride, as it
+ // will allow uses to have simpler addressing modes.
+ return AC->getValue()->getValue().abs()
+ .ult(BC->getValue()->getValue().abs());
+ }
}
- I->eraseFromParent();
- Changed = true;
+ // Then sort by the register which will permit the simplest uses.
+ // This is a heuristic; currently we only track the most complex use as a
+ // representative.
+ if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
+ return Sort.MaxComplexity < Other.Sort.MaxComplexity;
+
+ // Then sort them by their start values.
+ const SCEV *AStart = AR->getStart();
+ const SCEV *BStart = BR->getStart();
+ if (AStart != BStart) {
+ if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
+ const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
+ if (!BC) return true;
+ // Arbitrarily prefer wider registers.
+ if (AC->getValue()->getValue().getBitWidth() !=
+ BC->getValue()->getValue().getBitWidth())
+ return AC->getValue()->getValue().getBitWidth() >
+ BC->getValue()->getValue().getBitWidth();
+ // Prefer positive over negative if the absolute values are the same.
+ if (AC->getValue()->getValue().abs() ==
+ BC->getValue()->getValue().abs())
+ return AC->getValue()->getValue().isStrictlyPositive();
+ // Prefer the addrec with the lesser absolute value start.
+ return AC->getValue()->getValue().abs()
+ .ult(BC->getValue()->getValue().abs());
+ }
+ }
+ } else {
+ // AddRecs have higher priority than other things.
+ if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
+ // Sort by the register which will permit the simplest uses.
+ // This is a heuristic; currently we only track the most complex use as a
+ // representative.
+ if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
+ return Sort.MaxComplexity < Other.Sort.MaxComplexity;
+ }
+
+
+ // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
+ return Sort.Index < Other.Sort.Index;
}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
}
-/// isAddressUse - Returns true if the specified instruction is using the
-/// specified value as an address.
-static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
- bool isAddress = isa<LoadInst>(Inst);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(1) == OperandVal)
- isAddress = true;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::prefetch:
- case Intrinsic::x86_sse2_loadu_dq:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == OperandVal)
- isAddress = true;
- break;
- }
- }
- return isAddress;
+void RegCount::print(raw_ostream &OS) const {
+ OS << *Reg << ':';
+ Sort.print(OS);
}
-/// getAccessType - Return the type of the memory being accessed.
-static const Type *getAccessType(const Instruction *Inst) {
- const Type *AccessTy = Inst->getType();
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
- AccessTy = SI->getOperand(0)->getType();
- else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getOperand(1)->getType();
- break;
- }
- }
- return AccessTy;
+void RegCount::dump() const {
+ print(errs()); errs() << '\n';
}
namespace {
- /// BasedUser - For a particular base value, keep information about how we've
- /// partitioned the expression so far.
- struct BasedUser {
- /// Base - The Base value for the PHI node that needs to be inserted for
- /// this use. As the use is processed, information gets moved from this
- /// field to the Imm field (below). BasedUser values are sorted by this
- /// field.
- const SCEV *Base;
-
- /// Inst - The instruction using the induction variable.
- Instruction *Inst;
-
- /// OperandValToReplace - The operand value of Inst to replace with the
- /// EmittedBase.
- Value *OperandValToReplace;
-
- /// Imm - The immediate value that should be added to the base immediately
- /// before Inst, because it will be folded into the imm field of the
- /// instruction. This is also sometimes used for loop-variant values that
- /// must be added inside the loop.
- const SCEV *Imm;
-
- /// Phi - The induction variable that performs the striding that
- /// should be used for this user.
- PHINode *Phi;
-
- // isUseOfPostIncrementedValue - True if this should use the
- // post-incremented version of this IV, not the preincremented version.
- // This can only be set in special cases, such as the terminating setcc
- // instruction for a loop and uses outside the loop that are dominated by
- // the loop.
- bool isUseOfPostIncrementedValue;
-
- BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
- : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
- OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(se->getIntegerSCEV(0, Base->getType())),
- isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
-
- // Once we rewrite the code to insert the new IVs we want, update the
- // operands of Inst to use the new expression 'NewBase', with 'Imm' added
- // to it.
- void RewriteInstructionToUseNewBase(const SCEV *NewBase,
- Instruction *InsertPt,
- SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution *SE);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
- const Type *Ty,
- SCEVExpander &Rewriter,
- Instruction *IP,
- ScalarEvolution *SE);
- void dump() const;
- };
-}
-void BasedUser::dump() const {
- dbgs() << " Base=" << *Base;
- dbgs() << " Imm=" << *Imm;
- dbgs() << " Inst: " << *Inst;
-}
+/// Formula - This class holds information that describes a formula for
+/// satisfying a use. It may include broken-out immediates and scaled registers.
+struct Formula {
+ /// AM - This is used to represent complex addressing, as well as other kinds
+ /// of interesting uses.
+ TargetLowering::AddrMode AM;
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
- const Type *Ty,
- SCEVExpander &Rewriter,
- Instruction *IP,
- ScalarEvolution *SE) {
- Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
+ /// BaseRegs - The list of "base" registers for this use. When this is
+ /// non-empty, AM.HasBaseReg should be set to true.
+ SmallVector<const SCEV *, 2> BaseRegs;
- // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
- // re-analyze it.
- const SCEV *NewValSCEV = SE->getUnknown(Base);
+ /// ScaledReg - The 'scaled' register for this use. This should be non-null
+ /// when AM.Scale is not zero.
+ const SCEV *ScaledReg;
- // Always emit the immediate into the same block as the user.
- NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
+ Formula() : ScaledReg(0) {}
- return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
-}
+ unsigned getNumRegs() const;
+ uint32_t getComplexity() const;
+ const Type *getType() const;
+ void InitialMatch(const SCEV *S, Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
-// Once we rewrite the code to insert the new IVs we want, update the
-// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it. NewBasePt is the last instruction which contributes to the
-// value of NewBase in the case that it's a diffferent instruction from
-// the PHI that NewBase is computed from, or null otherwise.
-//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
- Instruction *NewBasePt,
- SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution *SE) {
- if (!isa<PHINode>(Inst)) {
- // By default, insert code at the user instruction.
- BasicBlock::iterator InsertPt = Inst;
-
- // However, if the Operand is itself an instruction, the (potentially
- // complex) inserted code may be shared by many users. Because of this, we
- // want to emit code for the computation of the operand right before its old
- // computation. This is usually safe, because we obviously used to use the
- // computation when it was computed in its current block. However, in some
- // cases (e.g. use of a post-incremented induction variable) the NewBase
- // value will be pinned to live somewhere after the original computation.
- // In this case, we have to back off.
- //
- // If this is a use outside the loop (which means after, since it is based
- // on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
- if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
- if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
- InsertPt = NewBasePt;
- ++InsertPt;
- } else if (Instruction *OpInst
- = dyn_cast<Instruction>(OperandValToReplace)) {
- InsertPt = OpInst;
- while (isa<PHINode>(InsertPt)) ++InsertPt;
- }
- }
- Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
- OperandValToReplace->getType(),
- Rewriter, InsertPt, SE);
- // Replace the use of the operand Value with the new Phi we just created.
- Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
-
- DEBUG(dbgs() << " Replacing with ");
- DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
- DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
- << *Imm << "\n");
- return;
+ /// referencesReg - Test if this formula references the given register.
+ bool referencesReg(const SCEV *S) const {
+ return S == ScaledReg ||
+ std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
- // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
- // expression into each operand block that uses it. Note that PHI nodes can
- // have multiple entries for the same predecessor. We use a map to make sure
- // that a PHI node only has a single Value* for each predecessor (which also
- // prevents us from inserting duplicate code in some blocks).
- DenseMap<BasicBlock*, Value*> InsertedCode;
- PHINode *PN = cast<PHINode>(Inst);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) == OperandValToReplace) {
- // If the original expression is outside the loop, put the replacement
- // code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
- // need only one copy of it even if it is referenced multiple times in
- // the PHI. We don't do this when the original expression is inside the
- // loop because multiple copies sometimes do useful sinking of code in
- // that case(?).
- Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
- BasicBlock *PHIPred = PN->getIncomingBlock(i);
- if (L->contains(OldLoc)) {
- // If this is a critical edge, split the edge so that we do not insert
- // the code on all predecessor/successor paths. We do this unless this
- // is the canonical backedge for this loop, as this can make some
- // inserted code be in an illegal position.
- if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
-
- // First step, split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
- P, false);
-
- // Next step: move the basic block. In particular, if the PHI node
- // is outside of the loop, and PredTI is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after PredTI.
- if (L->contains(PHIPred) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
+ bool operator==(const Formula &Other) const {
+ return BaseRegs == Other.BaseRegs &&
+ ScaledReg == Other.ScaledReg &&
+ AM.Scale == Other.AM.Scale &&
+ AM.BaseOffs == Other.AM.BaseOffs &&
+ AM.BaseGV == Other.AM.BaseGV;
+ }
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- PHIPred = NewBB;
- i = PN->getBasicBlockIndex(PHIPred);
- }
- }
- Value *&Code = InsertedCode[PHIPred];
- if (!Code) {
- // Insert the code into the end of the predecessor block.
- Instruction *InsertPt = (L->contains(OldLoc)) ?
- PHIPred->getTerminator() :
- OldLoc->getParent()->getTerminator();
- Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
- Rewriter, InsertPt, SE);
-
- DEBUG(dbgs() << " Changing PHI use to ");
- DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
- DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
- << *Imm << "\n");
- }
+ // This sorts at least partially based on host pointer values which are
+ // not deterministic, so it is only usable for uniqification.
+ bool operator<(const Formula &Other) const {
+ if (BaseRegs != Other.BaseRegs)
+ return BaseRegs < Other.BaseRegs;
+ if (ScaledReg != Other.ScaledReg)
+ return ScaledReg < Other.ScaledReg;
+ if (AM.Scale != Other.AM.Scale)
+ return AM.Scale < Other.AM.Scale;
+ if (AM.BaseOffs != Other.AM.BaseOffs)
+ return AM.BaseOffs < Other.AM.BaseOffs;
+ if (AM.BaseGV != Other.AM.BaseGV)
+ return AM.BaseGV < Other.AM.BaseGV;
+ return false;
+ }
- // Replace the use of the operand Value with the new Phi we just created.
- PN->setIncomingValue(i, Code);
- Rewriter.clear();
- }
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula.
+unsigned Formula::getNumRegs() const {
+ return !!ScaledReg + BaseRegs.size();
+}
+
+/// getComplexity - Return an oversimplified value indicating the complexity
+/// of this formula. This is used as a tie-breaker in choosing register
+/// preferences.
+uint32_t Formula::getComplexity() const {
+ // Encode the information in a uint32_t so that comparing with operator<
+ // will be interesting.
+ return
+ // Most significant, the number of registers. This saturates because we
+ // need the bits, and because beyond a few hundred it doesn't really matter.
+ (std::min(getNumRegs(), (1u<<15)-1) << 17) |
+ // Having multiple base regs is worse than having a base reg and a scale.
+ ((BaseRegs.size() > 1) << 16) |
+ // Scale absolute value.
+ ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
+ // Scale sign, which is less significant than the absolute value.
+ ((AM.Scale < 0) << 8) |
+ // Offset absolute value.
+ ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
+ // If a GV is present, treat it like a maximal offset.
+ ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
+ // Offset sign, which is less significant than the absolute offset.
+ ((AM.BaseOffs < 0) << 0);
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+ return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+ ScaledReg ? ScaledReg->getType() :
+ AM.BaseGV ? AM.BaseGV->getType() :
+ 0;
+}
+
+namespace {
+
+/// ComplexitySorter - A predicate which orders Formulae by the number of
+/// registers they contain.
+struct ComplexitySorter {
+ bool operator()(const Formula &LHS, const Formula &RHS) const {
+ unsigned L = LHS.getNumRegs();
+ unsigned R = RHS.getNumRegs();
+ if (L != R) return L < R;
+
+ return LHS.getComplexity() < RHS.getComplexity();
}
+};
- // PHI node might have become a constant value after SplitCriticalEdge.
- DeadInsts.push_back(Inst);
}
+/// DoInitialMatch - Recurrsion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+ SmallVectorImpl<const SCEV *> &Good,
+ SmallVectorImpl<const SCEV *> &Bad,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Collect expressions which properly dominate the loop header.
+ if (S->properlyDominates(L->getHeader(), &DT)) {
+ Good.push_back(S);
+ return;
+ }
-/// fitsInAddressMode - Return true if V can be subsumed within an addressing
-/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
- const TargetLowering *TLI, bool HasBaseReg) {
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
- int64_t VC = SC->getValue()->getSExtValue();
- if (TLI) {
- TargetLowering::AddrMode AM;
- AM.BaseOffs = VC;
- AM.HasBaseReg = HasBaseReg;
- return TLI->isLegalAddressingMode(AM, AccessTy);
- } else {
- // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
- return (VC > -(1 << 16) && VC < (1 << 16)-1);
+ // Look at add operands.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ return;
+ }
+
+ // Look at addrec operands.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (!AR->getStart()->isZero()) {
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()),
+ L, Good, Bad, SE, DT);
+ return;
}
}
- if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
- if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
- if (TLI) {
- TargetLowering::AddrMode AM;
- AM.BaseGV = GV;
- AM.HasBaseReg = HasBaseReg;
- return TLI->isLegalAddressingMode(AM, AccessTy);
- } else {
- // Default: assume global addresses are not legal.
- }
+ // Handle a multiplication by -1 (negation) if it didn't fold.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+ if (Mul->getOperand(0)->isAllOnesValue()) {
+ SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *NewMul = SE.getMulExpr(Ops);
+
+ SmallVector<const SCEV *, 4> MyGood;
+ SmallVector<const SCEV *, 4> MyBad;
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+ SE.getEffectiveSCEVType(NewMul->getType())));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+ E = MyGood.end(); I != E; ++I)
+ Good.push_back(SE.getMulExpr(NegOne, *I));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+ E = MyBad.end(); I != E; ++I)
+ Bad.push_back(SE.getMulExpr(NegOne, *I));
+ return;
}
- return false;
+ // Ok, we can't do anything interesting. Just stuff the whole thing into a
+ // register and hope for the best.
+ Bad.push_back(S);
}
-/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
-/// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
- Loop *L, ScalarEvolution *SE) {
- if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
- if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- SmallVector<const SCEV *, 4> NewOps;
- NewOps.reserve(SAE->getNumOperands());
-
- for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
- if (!SAE->getOperand(i)->isLoopInvariant(L)) {
- // If this is a loop-variant expression, it must stay in