aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-01-21 22:42:49 +0000
committerDan Gohman <gohman@apple.com>2010-01-21 22:42:49 +0000
commit8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8 (patch)
treebb2fd6686c3f2ba2b08dac5c10775f7936ed43b4
parent80ffc965f513bed2252316af8530c14c36c2e295 (diff)
Prune the search for candidate formulae if the number of register
operands exceeds the number of registers used in the initial solution, as that wouldn't lead to a profitable solution anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94107 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp99
1 files changed, 67 insertions, 32 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b7a733c91f..a5b4fa4bcb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1120,6 +1120,8 @@ public:
const SmallVector<LSRUse, 16> &Uses, const Loop *L,
ScalarEvolution &SE);
+ unsigned getNumRegs() const { return NumRegs; }
+
bool operator<(const Score &Other) const;
void print_details(raw_ostream &OS, const SCEV *Reg,
@@ -1383,6 +1385,11 @@ class LSRInstance {
/// arbitrary index value to each register as a sort tie breaker.
unsigned CurrentArbitraryRegIndex;
+ /// MaxNumRegs - To help prune the search for solutions, identify the number
+ /// of registers needed by the initial solution. No formula should require
+ /// more than this.
+ unsigned MaxNumRegs;
+
/// Factors - Interesting factors between use strides.
SmallSetVector<int64_t, 4> Factors;
@@ -1412,6 +1419,8 @@ class LSRInstance {
void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
+ bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
Formula Base);
void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
@@ -1913,6 +1922,23 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
CountRegister(*I, Complexity, LUIdx);
}
+/// InsertFormula - If the given formula has not yet been inserted, add it
+/// to the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // If a formula by itself would require more registers than the base solution,
+ // discard it and stop searching from it, as it won't be profitable. This is
+ // actually more permissive than it could be, because it doesn't include
+ // registers used by non-constant strides in F.
+ if (F.getNumRegs() > MaxNumRegs)
+ return false;
+
+ if (!LU.InsertFormula(F))
+ return false;
+
+ CountRegisters(LU.Formulae.back(), LUIdx);
+ return true;
+}
+
/// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
/// offsets.
void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
@@ -1930,8 +1956,7 @@ void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
- if (LU.InsertFormula(F))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, F);
}
}
@@ -1981,8 +2006,7 @@ void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
}
// If we make it here and it's legal, add it.
- if (LU.InsertFormula(F))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, F);
next:;
}
}
@@ -2006,11 +2030,9 @@ LSRInstance::GenerateFormulaeFromReplacedBaseReg(
E = AddOps.end(); I != E; ++I)
F.BaseRegs.push_back(*I);
F.AM.HasBaseReg = !F.BaseRegs.empty();
- if (LU.InsertFormula(F)) {
- CountRegisters(LU.Formulae.back(), LUIdx);
- // Recurse.
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like it.
GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
- }
}
/// GenerateReassociationReuse - Split out subexpressions from adds and
@@ -2095,8 +2117,7 @@ void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
}
if (Ops.size() > 1) {
F.BaseRegs.push_back(SE.getAddExpr(Ops));
- if (LU.InsertFormula(F))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, F);
}
}
@@ -2144,8 +2165,7 @@ void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
NewF.BaseRegs.pop_back();
NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
- if (LU.InsertFormula(NewF))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, NewF);
}
}
}
@@ -2174,8 +2194,7 @@ void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
- if (LU.InsertFormula(F))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, F);
}
}
}
@@ -2300,8 +2319,7 @@ void LSRInstance::GenerateConstantOffsetReuse() {
const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
if (Diff->isZero()) continue;
NewF.ScaledReg = Diff;
- if (LU.InsertFormula(NewF))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, NewF);
}
// Use the immediate in a base register.
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
@@ -2320,8 +2338,7 @@ void LSRInstance::GenerateConstantOffsetReuse() {
-(uint64_t)NewF.AM.BaseOffs)))
continue;
NewF.BaseRegs[N] = Diff;
- if (LU.InsertFormula(NewF))
- CountRegisters(LU.Formulae.back(), LUIdx);
+ (void)InsertFormula(LU, LUIdx, NewF);
}
}
}
@@ -2358,8 +2375,19 @@ LSRInstance::GenerateAllReuseFormulae() {
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::GenerateLoopInvariantRegisterUses() {
- for (size_t i = 0, e = RegSequence.size(); i != e; ++i)
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(RegSequence[i])) {
+ SmallVector<const SCEV *, 8> Worklist(RegSequence.begin(), RegSequence.end());
+
+ while (!Worklist.empty()) {
+ const SCEV *S = Worklist.pop_back_val();
+
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+ Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+ else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ Worklist.push_back(C->getOperand());
+ else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ Worklist.push_back(D->getLHS());
+ Worklist.push_back(D->getRHS());
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V))
if (L->contains(Inst)) continue;
@@ -2400,6 +2428,7 @@ LSRInstance::GenerateLoopInvariantRegisterUses() {
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
}
}
+ }
}
#ifndef NDEBUG
@@ -2424,7 +2453,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
- TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0) {
+ TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0), MaxNumRegs(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm()) return;
@@ -2538,15 +2567,24 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
if (Types.size() == 1)
Types.clear();
- // Now use the reuse data to generate a bunch of interesting ways
- // to formulate the values needed for the uses.
- GenerateAllReuseFormulae();
-
// If there are any uses of registers that we're tracking that have escaped
// IVUsers' attention, add trivial uses for them, so that the register
// voting process takes the into consideration.
GenerateLoopInvariantRegisterUses();
+ // Start by assuming we'll assign each use its own register. This is
+ // sometimes called "full" strength reduction, or "superhero" mode.
+ // Sometimes this is the best solution, but if there are opportunities for
+ // reuse we may find a better solution.
+ Score CurScore;
+ CurScore.RateInitial(Uses, L, SE);
+
+ MaxNumRegs = CurScore.getNumRegs();
+
+ // Now use the reuse data to generate a bunch of interesting ways
+ // to formulate the values needed for the uses.
+ GenerateAllReuseFormulae();
+
// Sort the formulae. TODO: This is redundantly sorted below.
for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
I != E; ++I) {
@@ -2559,13 +2597,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// next step is to start looking at register reuse possibilities.
DEBUG(print(dbgs()); dbgs() << '\n');
- // Start by assuming we'll assign each use its own register. This is
- // sometimes called "full" strength reduction, or "superhero" mode.
- // Sometimes this is the best solution, but if there are opportunities for
- // reuse we may find a better solution.
- Score CurScore;
- CurScore.RateInitial(Uses, L, SE);
-
// Create a sorted list of registers with those with the most uses appearing
// earlier in the list. We'll visit them first, as they're the most likely
// to represent profitable reuse opportunities.
@@ -2669,6 +2700,10 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
}
void LSRInstance::print(raw_ostream &OS) const {
+ if (MaxNumRegs != 0)
+ OS << "LSR is considering " << MaxNumRegs << " to be the maximum "
+ "number of registers needed.\n";
+
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;