aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-03-26 02:28:59 +0000
committerAlexander Kornienko <alexfh@google.com>2013-03-26 02:28:59 +0000
commitd934545ae6a00aa8a8179a93d11cbd93a5240849 (patch)
treeab44db08aa63a8f94a3e09d6491c4156c624af96 /lib/Transforms/Scalar
parent868d4470cdfa9472353ea2a49a6c456ddae9c95b (diff)
parentc204410d6bc435e7cb8ea768759a54135e8e92b5 (diff)
Updating branches/google/testing to r177703testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@177985 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp82
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp54
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp675
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp703
7 files changed, 518 insertions, 1048 deletions
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447f1c..129af8d45d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d646..5d02c68a7a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -64,10 +65,16 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +85,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -87,6 +111,7 @@ namespace {
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
+ virtual bool doFinalization(Module &M);
const char *getPassName() const {
return "Merge internal globals";
@@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
@@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
return false;
}
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
return new GlobalMerge(tli);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 97fff7e782..8e76c78f5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
+ // Computing the value outside of the loop brings no benefit if :
+ // - it is definitely used inside the loop in a way which can not be
+ // optimized away.
+ // - no use outside of the loop can take advantage of hoisting the
+ // computation out of the loop
+ if (ExitValue->getSCEVType()>=scMulExpr) {
+ unsigned NumHardInternalUses = 0;
+ unsigned NumSoftExternalUses = 0;
+ unsigned NumUses = 0;
+ for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+ IB!=IE && NumUses<=6 ; ++IB) {
+ Instruction *UseInstr = cast<Instruction>(*IB);
+ unsigned Opc = UseInstr->getOpcode();
+ NumUses++;
+ if (L->contains(UseInstr)) {
+ if (Opc == Instruction::Call || Opc == Instruction::Ret)
+ NumHardInternalUses++;
+ } else {
+ if (Opc == Instruction::PHI) {
+ // Do not count the Phi as a use. LCSSA may have inserted
+ // plenty of trivial ones.
+ NumUses--;
+ for (Value::use_iterator PB=UseInstr->use_begin(),
+ PE=UseInstr->use_end();
+ PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+ if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ continue;
+ }
+ if (Opc != Instruction::Call && Opc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ }
+ if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+ continue;
+ }
+
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9c67e327e2..0b62050b17 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -34,13 +34,9 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop* L, LPPassManager& LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
- bool &Changed, BasicBlock *Preheader);
-
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
+
+ private:
+ bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
};
}
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
return new LoopDeletion();
}
-/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// isLoopDead - Determined if a loop is dead. This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
- SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+ SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
bool &Changed, BasicBlock *Preheader) {
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
- Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
- for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
- if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock* preheader = L->getLoopPreheader();
+ BasicBlock *preheader = L->getLoopPreheader();
if (!preheader)
return false;
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst* TI = preheader->getTerminator();
+ TerminatorInst *TI = preheader->getTerminator();
TI->replaceUsesOfWith(L->getHeader(), exitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock *exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
int j = P->getBasicBlockIndex(exitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTree>();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ LoopInfo &loopInfo = getAnalysis<LoopInfo>();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4e4cb86464..9562cf8d5d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
@@ -2716,6 +2716,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 810a553c74..25306c2681 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,15 @@
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted, "Number of instructions deleted");
-STATISTIC(NumVectorized, "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to force the pass to not use DomTree and mem2reg, instead
/// forming SSA values through the SSAUpdater infrastructure.
@@ -69,112 +73,167 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief A partition of an alloca.
+ /// \brief Support for ordering ranges.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
+
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
- /// \brief A particular use of a partition of the alloca.
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +515,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +533,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +555,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +566,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +858,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);