aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-01-22 00:46:49 +0000
committerDan Gohman <gohman@apple.com>2010-01-22 00:46:49 +0000
commit7979b72febb73f7bb1d1ed095a68f210822b2e7c (patch)
treed74dd6f92611e69526c8684793e34a6a3513c0cc
parent3ec68f97ead4a2bc339b1b9012ca4cb2c4dd706a (diff)
Revert LoopStrengthReduce.cpp to pre-r94061 for now.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94123 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h3
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp4623
-rw-r--r--test/CodeGen/ARM/arm-negative-stride.ll26
-rw-r--r--test/CodeGen/ARM/lsr-code-insertion.ll4
-rw-r--r--test/CodeGen/ARM/remat.ll3
-rw-r--r--test/CodeGen/Thumb2/lsr-deficiency.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1.ll12
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll4
-rw-r--r--test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll2
-rw-r--r--test/CodeGen/X86/iv-users-in-other-loops.ll8
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll18
-rw-r--r--test/CodeGen/X86/loop-strength-reduce8.ll8
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll159
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll7
-rw-r--r--test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll7
-rw-r--r--test/Transforms/LoopStrengthReduce/count-to-zero.ll2
17 files changed, 2308 insertions, 2601 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 5bec8e445a..01df503a5e 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -27,7 +27,10 @@ namespace llvm {
/// and destroy it when finished to allow the release of the associated
/// memory.
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
+ public:
ScalarEvolution &SE;
+
+ private:
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
InsertedExpressions;
std::set<Value*> InsertedValues;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index acc66246c3..fa820ed8e4 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -17,45 +17,6 @@
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
-// Terminology note: this code has a lot of handling for "post-increment" or
-// "post-inc" users. This is not talking about post-increment addressing modes;
-// it is instead talking about code like this:
-//
-// %i = phi [ 0, %entry ], [ %i.next, %latch ]
-// ...
-// %i.next = add %i, 1
-// %c = icmp eq %i.next, %n
-//
-// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
-// it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
-// example, the icmp is a post-increment user, since it uses %i.next, which is
-// the value of the induction variable after the increment. The other common
-// case of post-increment users is users outside the loop.
-//
-// TODO: More sophistication in the way Formulae are generated.
-//
-// TODO: Handle multiple loops at a time.
-//
-// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
-// that {0,+,1}<%bb> is getting picked first because all 7 uses can
-// use it, and while it's a pretty good solution, it means that LSR
-// doesn't look further to find an even better solution.
-//
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-// instead of a GlobalValue?
-//
-// TODO: When truncation is free, truncate ICmp users' operands to make it a
-// smaller encoding (on x86 at least).
-//
-// TODO: When a negated register is used by an add (such as in a list of
-// multiple base registers, or as the increment expression in an addrec),
-// we may not actually need both reg and (-1 * reg) in registers; the
-// negation can be implemented by using a sub instead of an add. The
-// lack of support for taking this into consideration when making
-// register pressure decisions is partly worked around by the "Special"
-// use kind.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
@@ -65,1549 +26,2025 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
-namespace {
-
-// Constant strides come first which in turns are sorted by their absolute
-// values. If absolute values are the same, then positive strides comes first.
-// e.g.
-// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
-struct StrideCompare {
- const ScalarEvolution &SE;
- explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
-
- bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
- const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
- const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
- if (LHSC && RHSC) {
- unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
- SE.getTypeSizeInBits(RHS->getType()));
- APInt LV = LHSC->getValue()->getValue();
- APInt RV = RHSC->getValue()->getValue();
- LV.sextOrTrunc(BitWidth);
- RV.sextOrTrunc(BitWidth);
- APInt ALV = LV.abs();
- APInt ARV = RV.abs();
- if (ALV == ARV) {
- if (LV != RV)
- return LV.sgt(RV);
- } else {
- return ALV.ult(ARV);
- }
-
- // If it's the same value but different type, sort by bit width so
- // that we emit larger induction variables before smaller
- // ones, letting the smaller be re-written in terms of larger ones.
- return SE.getTypeSizeInBits(RHS->getType()) <
- SE.getTypeSizeInBits(LHS->getType());
- }
- return LHSC && !RHSC;
- }
-};
-
-/// RegSortData - This class holds data which is used to order reuse
-/// candidates.
-class RegSortData {
-public:
- /// Bits - This represents the set of LSRUses (by index) which reference a
- /// particular register.
- SmallBitVector Bits;
-
- /// MaxComplexity - This represents the greatest complexity (see the comments
- /// on Formula::getComplexity) seen with a particular register.
- uint32_t MaxComplexity;
-
- /// Index - This holds an arbitrary value used as a last-resort tie breaker
- /// to ensure deterministic behavior.
- unsigned Index;
-
- RegSortData() {}
-
- void print(raw_ostream &OS) const;
- void dump() const;
-};
+STATISTIC(NumReduced , "Number of IV uses strength reduced");
+STATISTIC(NumInserted, "Number of PHIs inserted");
+STATISTIC(NumVariable, "Number of PHIs with variable strides");
+STATISTIC(NumEliminated, "Number of strides eliminated");
+STATISTIC(NumShadow, "Number of Shadow IVs optimized");
+STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
+STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
-}
-
-void RegSortData::print(raw_ostream &OS) const {
- OS << "[NumUses=" << Bits.count()
- << ", MaxComplexity=";
- OS.write_hex(MaxComplexity);
- OS << ", Index=" << Index << ']';
-}
-
-void RegSortData::dump() const {
- print(errs()); errs() << '\n';
-}
+static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
+ cl::init(false),
+ cl::Hidden);
namespace {
-/// RegCount - This is a helper class to sort a given set of registers
-/// according to associated RegSortData values.
-class RegCount {
-public:
- const SCEV *Reg;
- RegSortData Sort;
-
- RegCount(const SCEV *R, const RegSortData &RSD)
- : Reg(R), Sort(RSD) {}
-
- // Sort by count. Returning true means the register is preferred.
- bool operator<(const RegCount &Other) const {
- // Sort by the number of unique uses of this register.
- unsigned A = Sort.Bits.count();
- unsigned B = Other.Sort.Bits.count();
- if (A != B) return A > B;
-
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
- // AddRecs have higher priority than other things.
- if (!BR) return true;
-
- // Prefer affine values.
- if (AR->isAffine() != BR->isAffine())
- return AR->isAffine();
-
- const Loop *AL = AR->getLoop();
- const Loop *BL = BR->getLoop();
- if (AL != BL) {
- unsigned ADepth = AL->getLoopDepth();
- unsigned BDepth = BL->getLoopDepth();
- // Prefer a less deeply nested addrec.
- if (ADepth != BDepth)
- return ADepth < BDepth;
-
- // Different loops at the same depth; do something arbitrary.
- BasicBlock *AH = AL->getHeader();
- BasicBlock *BH = BL->getHeader();
- for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
- if (&*I == BH) return true;
- return false;
- }
+ struct BasedUser;
- // Sort addrecs by stride.
- const SCEV *AStep = AR->getOperand(1);
- const SCEV *BStep = BR->getOperand(1);
- if (AStep != BStep) {
- if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
- const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
- if (!BC) return true;
- // Arbitrarily prefer wider registers.
- if (AC->getValue()->getValue().getBitWidth() !=
- BC->getValue()->getValue().getBitWidth())
- return AC->getValue()->getValue().getBitWidth() >
- BC->getValue()->getValue().getBitWidth();
- // Ignore the sign bit, assuming that striding by a negative value
- // is just as easy as by a positive value.
- // Prefer the addrec with the lesser absolute value stride, as it
- // will allow uses to have simpler addressing modes.
- return AC->getValue()->getValue().abs()
- .ult(BC->getValue()->getValue().abs());
- }
- }
+ /// IVInfo - This structure keeps track of one IV expression inserted during
+ /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
+ /// well as the PHI node and increment value created for rewrite.
+ struct IVExpr {
+ const SCEV *Stride;
+ const SCEV *Base;
+ PHINode *PHI;
- // Then sort by the register which will permit the simplest uses.
- // This is a heuristic; currently we only track the most complex use as a
- // representative.
- if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
- return Sort.MaxComplexity < Other.Sort.MaxComplexity;
-
- // Then sort them by their start values.
- const SCEV *AStart = AR->getStart();
- const SCEV *BStart = BR->getStart();
- if (AStart != BStart) {
- if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
- const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
- if (!BC) return true;
- // Arbitrarily prefer wider registers.
- if (AC->getValue()->getValue().getBitWidth() !=
- BC->getValue()->getValue().getBitWidth())
- return AC->getValue()->getValue().getBitWidth() >
- BC->getValue()->getValue().getBitWidth();
- // Prefer positive over negative if the absolute values are the same.
- if (AC->getValue()->getValue().abs() ==
- BC->getValue()->getValue().abs())
- return AC->getValue()->getValue().isStrictlyPositive();
- // Prefer the addrec with the lesser absolute value start.
- return AC->getValue()->getValue().abs()
- .ult(BC->getValue()->getValue().abs());
- }
- }
- } else {
- // AddRecs have higher priority than other things.
- if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
- // Sort by the register which will permit the simplest uses.
- // This is a heuristic; currently we only track the most complex use as a
- // representative.
- if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
- return Sort.MaxComplexity < Other.Sort.MaxComplexity;
- }
+ IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
+ : Stride(stride), Base(base), PHI(phi) {}
+ };
+ /// IVsOfOneStride - This structure keeps track of all IV expression inserted
+ /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
+ struct IVsOfOneStride {
+ std::vector<IVExpr> IVs;
- // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
- return Sort.Index < Other.Sort.Index;
- }
+ void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
+ IVs.push_back(IVExpr(Stride, Base, PHI));
+ }
+ };
- void print(raw_ostream &OS) const;
- void dump() const;
-};
+ class LoopStrengthReduce : public LoopPass {
+ IVUsers *IU;
+ ScalarEvolution *SE;
+ bool Changed;
+
+ /// IVsByStride - Keep track of all IVs that have been inserted for a
+ /// particular stride.
+ std::map<const SCEV *, IVsOfOneStride> IVsByStride;
+
+ /// DeadInsts - Keep track of instructions we may have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ LoopPass(&ID), TLI(tli) {}
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved("loops");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("domtree");
+
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+ }
+ private:
+ void OptimizeIndvars(Loop *L);
+
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// postinc iv when possible.
+ void OptimizeLoopTermCond(Loop *L);
+
+ /// OptimizeShadowIV - If IV is used in a int-to-float cast
+ /// inside the loop then try to eliminate the cast opeation.
+ void OptimizeShadowIV(Loop *L);
+
+ /// OptimizeMax - Rewrite the loop's terminating condition
+ /// if it uses a max computation.
+ ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse);
+
+ /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+ /// deciding when to exit the loop is used only for that purpose, try to
+ /// rearrange things so it counts down to a test against zero.
+ bool OptimizeLoopCountIV(Loop *L);
+ bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse, Loop *L);
+
+ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+ /// single stride of IV. All of the users may have different starting
+ /// values, and this may not be the only stride.
+ void StrengthReduceIVUsersOfStride(const SCEV *Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void StrengthReduceIVUsers(Loop *L);
+
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass = false);
+
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
+ const SCEV* &CondStride);
+ bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
+ const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
+ IVExpr&, const Type*,
+ const std::vector<BasedUser>& UsersToProcess);
+ bool ValidScale(bool, int64_t,
+ const std::vector<BasedUser>& UsersToProcess);
+ bool ValidOffset(bool, int64_t, int64_t,
+ const std::vector<BasedUser>& UsersToProcess);
+ const SCEV *CollectIVUsers(const SCEV *Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L,
+ bool &AllUsesAreAddresses,
+ bool &AllUsesAreOutsideLoop,
+ std::vector<BasedUser> &UsersToProcess);
+ bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
+ bool ShouldUseFullStrengthReductionMode(
+ const std::vector<BasedUser> &UsersToProcess,
+ const Loop *L,
+ bool AllUsesAreAddresses,
+ const SCEV *Stride);
+ void PrepareToStrengthReduceFully(
+ std::vector<BasedUser> &UsersToProcess,
+ const SCEV *Stride,
+ const SCEV *CommonExprs,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter);
+ void PrepareToStrengthReduceFromSmallerStride(
+ std::vector<BasedUser> &UsersToProcess,
+ Value *CommonBaseV,
+ const IVExpr &ReuseIV,
+ Instruction *PreInsertPt);
+ void PrepareToStrengthReduceWithNewPhi(
+ std::vector<BasedUser> &UsersToProcess,
+ const SCEV *Stride,
+ const SCEV *CommonExprs,
+ Value *CommonBaseV,
+ Instruction *IVIncInsertPt,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter);
+
+ void DeleteTriviallyDeadInstructions();
+ };
}
-void RegCount::print(raw_ostream &OS) const {
- OS << *Reg << ':';
- Sort.print(OS);
-}
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
-void RegCount::dump() const {
- print(errs()); errs() << '\n';
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
}
-namespace {
-
-/// Formula - This class holds information that describes a formula for
-/// satisfying a use. It may include broken-out immediates and scaled registers.
-struct Formula {
- /// AM - This is used to represent complex addressing, as well as other kinds
- /// of interesting uses.
- TargetLowering::AddrMode AM;
-
- /// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty, AM.HasBaseReg should be set to true.
- SmallVector<const SCEV *, 2> BaseRegs;
-
- /// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when AM.Scale is not zero.
- const SCEV *ScaledReg;
-
- Formula() : ScaledReg(0) {}
-
- unsigned getNumRegs() const;
- uint32_t getComplexity() const;
- const Type *getType() const;
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
- void InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
- /// referencesReg - Test if this formula references the given register.
- bool referencesReg(const SCEV *S) const {
- return S == ScaledReg ||
- std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
- }
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
+ }
- bool operator==(const Formula &Other) const {
- return BaseRegs == Other.BaseRegs &&
- ScaledReg == Other.ScaledReg &&
- AM.Scale == Other.AM.Scale &&
- AM.BaseOffs == Other.AM.BaseOffs &&
- AM.BaseGV == Other.AM.BaseGV;
+ I->eraseFromParent();
+ Changed = true;
}
+}
- // This sorts at least partially based on host pointer values which are
- // not deterministic, so it is only usable for uniqification.
- bool operator<(const Formula &Other) const {
- if (BaseRegs != Other.BaseRegs)
- return BaseRegs < Other.BaseRegs;
- if (ScaledReg != Other.ScaledReg)
- return ScaledReg < Other.ScaledReg;
- if (AM.Scale != Other.AM.Scale)
- return AM.Scale < Other.AM.Scale;
- if (AM.BaseOffs != Other.AM.BaseOffs)
- return AM.BaseOffs < Other.AM.BaseOffs;
- if (AM.BaseGV != Other.AM.BaseGV)
- return AM.BaseGV < Other.AM.BaseGV;
- return false;
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse2_loadu_dq:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getOperand(1) == OperandVal)
+ isAddress = true;
+ break;
+ }
}
-
- void print(raw_ostream &OS) const;
- void dump() const;
-};
-
+ return isAddress;
}
-/// getNumRegs - Return the total number of register operands used by this
-/// formula. This does not include register uses implied by non-constant
-/// addrec strides.
-unsigned Formula::getNumRegs() const {
- return !!ScaledReg + BaseRegs.size();
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+ const Type *AccessTy = Inst->getType();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ AccessTy = II->getOperand(1)->getType();
+ break;
+ }
+ }
+ return AccessTy;
}
-/// getComplexity - Return an oversimplified value indicating the complexity
-/// of this formula. This is used as a tie-breaker in choosing register
-/// preferences.
-uint32_t Formula::getComplexity() const {
- // Encode the information in a uint32_t so that comparing with operator<
- // will be interesting.
- return
- // Most significant, the number of registers. This saturates because we
- // need the bits, and because beyond a few hundred it doesn't really matter.
- (std::min(getNumRegs(), (1u<<15)-1) << 17) |
- // Having multiple base regs is worse than having a base reg and a scale.
- ((BaseRegs.size() > 1) << 16) |
- // Scale absolute value.
- ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
- // Scale sign, which is less significant than the absolute value.
- ((AM.Scale < 0) << 8) |
- // Offset absolute value.
- ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
- // If a GV is present, treat it like a maximal offset.
- ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
- // Offset sign, which is less significant than the absolute offset.
- ((AM.BaseOffs < 0) << 0);
+namespace {
+ /// BasedUser - For a particular base value, keep information about how we've
+ /// partitioned the expression so far.
+ struct BasedUser {
+ /// Base - The Base value for the PHI node that needs to be inserted for
+ /// this use. As the use is processed, information gets moved from this
+ /// field to the Imm field (below). BasedUser values are sorted by this
+ /// field.
+ const SCEV *Base;
+
+ /// Inst - The instruction using the induction variable.
+ Instruction *Inst;
+
+ /// OperandValToReplace - The operand value of Inst to replace with the
+ /// EmittedBase.
+ Value *OperandValToReplace;
+
+ /// Imm - The immediate value that should be added to the base immediately
+ /// before Inst, because it will be folded into the imm field of the
+ /// instruction. This is also sometimes used for loop-variant values that
+ /// must be added inside the loop.
+ const SCEV *Imm;
+
+ /// Phi - The induction variable that performs the striding that
+ /// should be used for this user.
+ PHINode *Phi;
+
+ // isUseOfPostIncrementedValue - True if this should use the
+ // post-incremented version of this IV, not the preincremented version.
+ // This can only be set in special cases, such as the terminating setcc
+ // instruction for a loop and uses outside the loop that are dominated by
+ // the loop.
+ bool isUseOfPostIncrementedValue;
+
+ BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
+ : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+ OperandValToReplace(IVSU.getOperandValToReplace()),
+ Imm(se->getIntegerSCEV(0, Base->getType())),
+ isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
+
+ // Once we rewrite the code to insert the new IVs we want, update the
+ // operands of Inst to use the new expression 'NewBase', with 'Imm' added
+ // to it.
+ void RewriteInstructionToUseNewBase(const SCEV *NewBase,
+ Instruction *InsertPt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE);
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
+ const Type *Ty,
+ SCEVExpander &Rewriter,
+ Instruction *IP,
+ ScalarEvolution *SE);
+ void dump() const;
+ };
}
-/// getType - Return the type of this formula, if it has one, or null
-/// otherwise. This type is meaningless except for the bit size.
-const Type *Formula::getType() const {
- return !BaseRegs.empty() ? BaseRegs.front()->getType() :
- ScaledReg ? ScaledReg->getType() :
- AM.BaseGV ? AM.BaseGV->getType() :
- 0;
+void BasedUser::dump() const {
+ dbgs() << " Base=" << *Base;
+ dbgs() << " Imm=" << *Imm;
+ dbgs() << " Inst: " << *Inst;
}
-namespace {
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
+ const Type *Ty,
+ SCEVExpander &Rewriter,
+ Instruction *IP,
+ ScalarEvolution *SE) {
+ Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
-/// ComplexitySorter - A predicate which orders Formulae by the number of
-/// registers they contain.
-struct ComplexitySorter {
- bool operator()(const Formula &LHS, const Formula &RHS) const {
- unsigned L = LHS.getNumRegs();
- unsigned R = RHS.getNumRegs();
- if (L != R) return L < R;
+ // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
+ // re-analyze it.
+ const SCEV *NewValSCEV = SE->getUnknown(Base);
- return LHS.getComplexity() < RHS.getComplexity();
- }
-};
+ // Always emit the immediate into the same block as the user.
+ NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
+ return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
}
-/// DoInitialMatch - Recurrsion helper for InitialMatch.
-static void DoInitialMatch(const SCEV *S, Loop *L,
- SmallVectorImpl<const SCEV *> &Good,
- SmallVectorImpl<const SCEV *> &Bad,
- ScalarEvolution &SE, DominatorTree &DT) {
- // Collect expressions which properly dominate the loop header.
- if (S->properlyDominates(L->getHeader(), &DT)) {
- Good.push_back(S);
- return;
- }
-
- // Look at add operands.
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- DoInitialMatch(*I, L, Good, Bad, SE, DT);
- return;
- }
- // Look at addrec operands.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- if (!AR->getStart()->isZero()) {
- DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
- DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
- AR->getStepRecurrence(SE),
- AR->getLoop()),
- L, Good, Bad, SE, DT);
- return;
+// Once we rewrite the code to insert the new IVs we want, update the
+// operands of Inst to use the new expression 'NewBase', with 'Imm' added
+// to it. NewBasePt is the last instruction which contributes to the
+// value of NewBase in the case that it's a diffferent instruction from
+// the PHI that NewBase is computed from, or null otherwise.
+//
+void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
+ Instruction *NewBasePt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE) {
+ if (!isa<PHINode>(Inst)) {
+ // By default, insert code at the user instruction.
+ BasicBlock::iterator InsertPt = Inst;
+
+ // However, if the Operand is itself an instruction, the (potentially
+ // complex) inserted code may be shared by many users. Because of this, we
+ // want to emit code for the computation of the operand right before its old
+ // computation. This is usually safe, because we obviously used to use the
+ // computation when it was computed in its current block. However, in some
+ // cases (e.g. use of a post-incremented induction variable) the NewBase
+ // value will be pinned to live somewhere after the original computation.
+ // In this case, we have to back off.
+ //
+ // If this is a use outside the loop (which means after, since it is based
+ // on a loop indvar) we use the post-incremented value, so that we don't
+ // artificially make the preinc value live out the bottom of the loop.
+ if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
+ if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
+ InsertPt = NewBasePt;
+ ++InsertPt;
+ } else if (Instruction *OpInst
+ = dyn_cast<Instruction>(OperandValToReplace)) {
+ InsertPt = OpInst;
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ }
}
+ Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
+ OperandValToReplace->getType(),
+ Rewriter, InsertPt, SE);
+ // Replace the use of the operand Value with the new Phi we just created.
+ Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
+
+ DEBUG(dbgs() << " Replacing with ");
+ DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
+ DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
+ << *Imm << "\n");
+ return;
}
- // Handle a multiplication by -1 (negation) if it didn't fold.
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
- if (Mul->getOperand(0)->isAllOnesValue()) {
- SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
- const SCEV *NewMul = SE.getMulExpr(Ops);
-
- SmallVector<const SCEV *, 4> MyGood;
- SmallVector<const SCEV *, 4> MyBad;
- DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
- const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
- SE.getE