aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/InitializePasses.h1
-rw-r--r--include/llvm/Transforms/Scalar.h6
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp14
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/SROA.cpp2628
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--test/Transforms/SROA/basictest.ll741
-rw-r--r--test/Transforms/SROA/lit.local.cfg1
-rw-r--r--test/Transforms/SROA/phi-and-select.ll325
-rw-r--r--test/Transforms/SROA/vector-promotion.ll191
10 files changed, 3907 insertions, 2 deletions
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index c1c833d949..84c2b65f97 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -221,6 +221,7 @@ void initializeRegionOnlyViewerPass(PassRegistry&);
void initializeRegionPrinterPass(PassRegistry&);
void initializeRegionViewerPass(PassRegistry&);
void initializeSCCPPass(PassRegistry&);
+void initializeSROAPass(PassRegistry&);
void initializeSROA_DTPass(PassRegistry&);
void initializeSROA_SSAUpPass(PassRegistry&);
void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 3dce6fe37f..18114ce293 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -70,6 +70,12 @@ FunctionPass *createAggressiveDCEPass();
//===----------------------------------------------------------------------===//
//
+// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
+//
+FunctionPass *createSROAPass();
+
+//===----------------------------------------------------------------------===//
+//
// ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas
// if possible.
//
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 43b4ab5efa..66d979cf2a 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -40,6 +40,10 @@ UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+static cl::opt<bool> UseNewSROA("use-new-sroa",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable the new, experimental SROA pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -100,7 +104,10 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
addInitialAliasAnalysisPasses(FPM);
FPM.add(createCFGSimplificationPass());
- FPM.add(createScalarReplAggregatesPass());
+ if (UseNewSROA)
+ FPM.add(createSROAPass());
+ else
+ FPM.add(createScalarReplAggregatesPass());
FPM.add(createEarlyCSEPass());
FPM.add(createLowerExpectIntrinsicPass());
}
@@ -265,7 +272,10 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createInstructionCombiningPass());
PM.add(createJumpThreadingPass());
// Break up allocas
- PM.add(createScalarReplAggregatesPass());
+ if (UseNewSROA)
+ PM.add(createSROAPass());
+ else
+ PM.add(createScalarReplAggregatesPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createFunctionAttrsPass()); // Add nocapture.
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a01e0661b1..b3fc6e338c 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMScalarOpts
Reassociate.cpp
Reg2Mem.cpp
SCCP.cpp
+ SROA.cpp
Scalar.cpp
ScalarReplAggregates.cpp
SimplifyCFGPass.cpp
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
new file mode 100644
index 0000000000..9ecefa6199
--- /dev/null
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -0,0 +1,2628 @@
+//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation implements the well known scalar replacement of
+/// aggregates transformation. It tries to identify promotable elements of an
+/// aggregate alloca, and promote them to registers. It will also try to
+/// convert uses of an element (or set of elements) of an alloca into a vector
+/// or bitfield-style integer scalar if appropriate.
+///
+/// It works to do this with minimal slicing of the alloca so that regions
+/// which are merely transferred in and out of external memory remain unchanged
+/// and are not decomposed to scalar code.
+///
+/// Because this also performs alloca promotion, it can be thought of as also
+/// serving the purpose of SSA formation. The algorithm iterates on the
+/// function until all opportunities for promotion have been realized.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sroa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both due decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
+ /// \brief A common base class for representing a half-open byte range.
+ struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
+
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
+
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in in cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
+
+ /// \brief Support comparison with a single offset to allow binary searches.
+ bool operator<(uint64_t RHSOffset) const {
+ return BeginOffset < RHSOffset;
+ }
+
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+ };
+
+ /// \brief A partition of an alloca.
+ ///
+ /// This structure represents a contiguous partition of the alloca. These are
+ /// formed by examining the uses of the alloca. During formation, they may
+ /// overlap but once an AllocaPartitioning is built, the Partitions within it
+ /// are all disjoint.
+ struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially split operations such as memset and memcpy.
+ ///
+ /// FIXME: At some point we should consider loads and stores of FCAs to be
+ /// splittable and eagerly split them into scalar values.
+ bool IsSplittable;
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+ };
+
+ /// \brief A particular use of a partition of the alloca.
+ ///
+ /// This structure is used to associate uses of a partition with it. They
+ /// mark the range of bytes which are referenced by a particular instruction,
+ /// and includes a handle to the user itself and the pointer value in use.
+ /// The bounds of these uses are determined by intersecting the bounds of the
+ /// memory use itself with a particular partition. As a consequence there is
+ /// intentionally overlap between various usues of the same partition.
+ struct PartitionUse : public ByteRange {
+ /// \brief The user of this range of the alloca.
+ AssertingVH<Instruction> User;
+
+ /// \brief The particular pointer value derived from this alloca in use.
+ AssertingVH<Instruction> Ptr;
+
+ PartitionUse() : ByteRange(), User(), Ptr() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset,
+ Instruction *User, Instruction *Ptr)
+ : ByteRange(BeginOffset, EndOffset), User(User), Ptr(Ptr) {}
+ };
+
+ /// \brief Construct a partitioning of a particular alloca.
+ ///
+ /// Construction does most of the work for partitioning the alloca. This
+ /// performs the necessary walks of users and builds a partitioning from it.
+ AllocaPartitioning(const TargetData &TD, AllocaInst &AI);
+
+ /// \brief Test whether a pointer to the allocation escapes our analysis.
+ ///
+ /// If this is true, the partitioning is never fully built and should be
+ /// ignored.
+ bool isEscaped() const { return PointerEscapingInstr; }
+
+ /// \brief Support for iterating over the partitions.
+ /// @{
+ typedef SmallVectorImpl<Partition>::iterator iterator;
+ iterator begin() { return Partitions.begin(); }
+ iterator end() { return Partitions.end(); }
+
+ typedef SmallVectorImpl<Partition>::const_iterator const_iterator;
+ const_iterator begin() const { return Partitions.begin(); }
+ const_iterator end() const { return Partitions.end(); }
+ /// @}
+
+ /// \brief Support for iterating over and manipulating a particular
+ /// partition's uses.
+ ///
+ /// The iteration support provided for uses is more limited, but also
+ /// includes some manipulation routines to support rewriting the uses of
+ /// partitions during SROA.
+ /// @{
+ typedef SmallVectorImpl<PartitionUse>::iterator use_iterator;
+ use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); }
+ use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
+ use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
+ use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
+ void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {
+ Uses[Idx].insert(UI, U);
+ }
+ void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {
+ Uses[I - begin()].insert(UI, U);
+ }
+ void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
+ void use_erase(const_iterator I, use_iterator UI) {
+ Uses[I - begin()].erase(UI);
+ }
+
+ typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator;
+ const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); }
+ const_use_iterator use_begin(const_iterator I) const {
+ return Uses[I - begin()].begin();
+ }
+ const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); }
+ const_use_iterator use_end(const_iterator I) const {
+ return Uses[I - begin()].end();
+ }
+ /// @}
+
+ /// \brief Allow iterating the dead users for this alloca.
+ ///
+ /// These are instructions which will never actually use the alloca as they
+ /// are outside the allocated range. They are safe to replace with undef and
+ /// delete.
+ /// @{
+ typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;
+ dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }
+ dead_user_iterator dead_user_end() const { return DeadUsers.end(); }
+ /// @}
+
+ /// \brief Allow iterating the dead operands referring to this alloca.
+ ///
+ /// These are operands which have cannot actually be used to refer to the
+ /// alloca as they are outside its range and the user doesn't correct for
+ /// that. These mostly consist of PHI node inputs and the like which we just
+ /// need to replace with undef.
+ /// @{
+ typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator;
+ dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); }
+ dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
+ /// @}
+
+ /// \brief MemTransferInst auxiliary data.
+ /// This struct provides some auxiliary data about memory transfer
+ /// intrinsics such as memcpy and memmove. These intrinsics can use two
+ /// different ranges within the same alloca, and provide other challenges to
+ /// correctly represent. We stash extra data to help us untangle this
+ /// after the partitioning is complete.
+ struct MemTransferOffsets {
+ uint64_t DestBegin, DestEnd;
+ uint64_t SourceBegin, SourceEnd;
+ bool IsSplittable;
+ };
+ MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const {
+ return MemTransferInstData.lookup(&II);
+ }
+
+ /// \brief Map from a PHI or select operand back to a partition.
+ ///
+ /// When manipulating PHI nodes or selects, they can use more than one
+ /// partition of an alloca. We store a special mapping to allow finding the
+ /// partition referenced by each of these operands, if any.
+ iterator findPartitionForPHIOrSelectOperand(Instruction &I, Value *Op) {
+ SmallDenseMap<std::pair<Instruction *, Value *>,
+ std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(std::make_pair(&I, Op));
+ if (MapIt == PHIOrSelectOpMap.end())
+ return end();
+
+ return begin() + MapIt->second.first;
+ }
+
+ /// \brief Map from a PHI or select operand back to the specific use of
+ /// a partition.
+ ///
+ /// Similar to mapping these operands back to the partitions, this maps
+ /// directly to the use structure of that partition.
+ use_iterator findPartitionUseForPHIOrSelectOperand(Instruction &I,
+ Value *Op) {
+ SmallDenseMap<std::pair<Instruction *, Value *>,
+ std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(std::make_pair(&I, Op));
+ assert(MapIt != PHIOrSelectOpMap.end());
+ return Uses[MapIt->second.first].begin() + MapIt->second.second;
+ }
+
+ /// \brief Compute a common type among the uses of a particular partition.
+ ///
+ /// This routines walks all of the uses of a particular partition and tries
+ /// to find a common type between them. Untyped operations such as memset and
+ /// memcpy are ignored.
+ Type *getCommonType(iterator I) const;
+
+ void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
+ void printUsers(raw_ostream &OS, const_iterator I,
+ StringRef Indent = " ") const;
+ void print(raw_ostream &OS) const;
+ void dump(const_iterator I) const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;
+ void dump() const LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED;
+
+private:
+ template <typename DerivedT, typename RetT = void> class BuilderBase;
+ class PartitionBuilder;
+ friend class AllocaPartitioning::PartitionBuilder;
+ class UseBuilder;
+ friend class AllocaPartitioning::UseBuilder;
+
+ /// \brief Handle to alloca instruction to simplify method interfaces.
+ AllocaInst &AI;
+
+ /// \brief The instruction responsible for this alloca having no partitioning.
+ ///
+ /// When an instruction (potentially) escapes the pointer to the alloca, we
+ /// store a pointer to that here and abort trying to partition the alloca.
+ /// This will be null if the alloca is partitioned successfully.
+ Instruction *PointerEscapingInstr;
+
+ /// \brief The partitions of the alloca.
+ ///
+ /// We store a vector of the partitions over the alloca here. This vector is
+ /// sorted by increasing begin offset, and then by decreasing end offset. See
+ /// the Partition inner class for more details. Initially there are overlaps,
+ /// be during construction we form a disjoint sequence toward the end.
+ SmallVector<Partition, 8> Partitions;
+
+ /// \brief The uses of the partitions.
+ ///
+ /// This is essentially a mapping from each partition to a list of uses of
+ /// that partition. The mapping is done with a Uses vector that has the exact
+ /// same number of entries as the partition vector. Each entry is itself
+ /// a vector of the uses.
+ SmallVector<SmallVector<PartitionUse, 2>, 8> Uses;
+
+ /// \brief Instructions which will become dead if we rewrite the alloca.
+ ///
+ /// Note that these are not separated by partition. This is because we expect
+ /// a partitioned alloca to be completely rewritten or not rewritten at all.
+ /// If rewritten, all these instructions can simply be removed and replaced
+ /// with undef as they come from outside of the allocated space.
+ SmallVector<Instruction *, 8> DeadUsers;
+
+ /// \brief Operands which will become dead if we rewrite the alloca.
+ ///
+ /// These are operands that in their particular use can be replaced with
+ /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
+ /// to PHI nodes and the like. They aren't entirely dead (there might be
+ /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
+ /// want to swap this particular input for undef to simplify the use lists of
+ /// the alloca.
+ SmallVector<Use *, 8> DeadOperands;
+
+ /// \brief The underlying storage for auxiliary memcpy and memset info.
+ SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData;
+
+ /// \brief A side datastructure used when building up the partitions and uses.
+ ///
+ /// This mapping is only really used during the initial building of the
+ /// partitioning so that we can retain information about PHI and select nodes
+ /// processed.
+ SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;
+
+ /// \brief Auxiliary information for particular PHI or select operands.
+ SmallDenseMap<std::pair<Instruction *, Value *>,
+ std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
+
+ /// \brief A utility routine called from the constructor.
+ ///
+ /// This does what it says on the tin. It is the key of the alloca partition
+ /// splitting and merging. After it is called we have the desired disjoint
+ /// collection of partitions.
+ void splitAndMergePartitions();
+};
+}
+
+template <typename DerivedT, typename RetT>
+class AllocaPartitioning::BuilderBase
+ : public InstVisitor<DerivedT, RetT> {
+public:
+ BuilderBase(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : TD(TD),
+ AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {
+ enqueueUsers(AI, 0);
+ }
+
+protected:
+ const TargetData &TD;
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
+
+ struct OffsetUse {
+ Use *U;
+ uint64_t Offset;
+ };
+ SmallVector<OffsetUse, 8> Queue;
+
+ // The active offset and use while visiting.
+ Use *U;
+ uint64_t Offset;
+
+ void enqueueUsers(Instruction &I, uint64_t UserOffset) {
+ SmallPtrSet<User *, 8> UserSet;
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ if (!UserSet.insert(*UI))
+ continue;
+
+ OffsetUse OU = { &UI.getUse(), UserOffset };
+ Queue.push_back(OU);
+ }
+ }
+
+ bool computeConstantGEPOffset(GetElementPtrInst &GEPI, uint64_t &GEPOffset) {
+ GEPOffset = Offset;
+ for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ return false;
+ if (OpC->isZero())
+ continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD.getStructLayout(STy);
+ GEPOffset += SL->getElementOffset(ElementIdx);
+ continue;
+ }
+
+ GEPOffset
+ += OpC->getZExtValue() * TD.getTypeAllocSize(GTI.getIndexedType());
+ }
+ return true;
+ }
+
+ Value *foldSelectInst(SelectInst &SI) {
+ // If the condition being selected on is a constant or the same value is
+ // being selected between, fold the select. Yes this does (rarely) happen
+ // early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+ return SI.getOperand(1+CI->isZero());
+ if (SI.getOperand(1) == SI.getOperand(2)) {
+ assert(*U == SI.getOperand(1));
+ return SI.getOperand(1);
+ }
+ return 0;
+ }
+};
+
+/// \brief Builder for the alloca partitioning.
+///
+/// This class builds an alloca partitioning by recursively visiting the uses
+/// of an alloca and splitting the partitions for each load and store at each
+/// offset.
+class AllocaPartitioning::PartitionBuilder
+ : public BuilderBase<PartitionBuilder, bool> {
+ friend class InstVisitor<PartitionBuilder, bool>;
+
+ SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
+
+public:
+ PartitionBuilder(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : BuilderBase(TD, AI, P) {}
+
+ /// \brief Run the builder over the allocation.
+ bool operator()() {
+ // Note that we have to re-evaluate size on each trip through the loop as
+ // the queue grows at the tail.
+ for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
+ U = Queue[Idx].U;
+ Offset = Queue[Idx].Offset;
+ if (!visit(cast<Instruction>(U->getUser())))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ bool markAsEscaping(Instruction &I) {
+ P.PointerEscapingInstr = &I;
+ return false;
+ }
+
+ void insertUse(Instruction &I, uint64_t Size, bool IsSplittable = false) {
+ uint64_t BeginOffset = Offset, EndOffset = Offset + Size;
+
+ // Completely skip uses which start outside of the allocation.
+ if (BeginOffset >= AllocSize) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
+ << " which starts past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ return;
+ }
+
+ // Clamp the size to the allocation.
+ if (EndOffset > AllocSize) {
+ DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
+ << " to remain within the " << AllocSize << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ EndOffset = AllocSize;
+ }
+
+ // See if we can just add a user onto the last slot currently occupied.
+ if (!P.Partitions.empty() &&
+ P.Partitions.back().BeginOffset == BeginOffset &&
+ P.Partitions.back().EndOffset == EndOffset) {
+ P.Partitions.back().IsSplittable &= IsSplittable;
+ return;
+ }
+
+ Partition New(BeginOffset, EndOffset, IsSplittable);
+ P.Partitions.push_back(New);
+ }
+
+ bool handleLoadOrStore(Type *Ty, Instruction &I) {
+ uint64_t Size = TD.getTypeStoreSize(Ty);
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset >= AllocSize || Size > AllocSize || Offset + Size > AllocSize) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
+ << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ return true;
+ }
+
+ insertUse(I, Size);
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ enqueueUsers(BC, Offset);
+ return true;
+ }
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ //unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ //assert(IntPtrWidth == Offset.getBitWidth());
+ uint64_t GEPOffset;
+ if (!computeConstantGEPOffset(GEPI, GEPOffset))
+ return markAsEscaping(GEPI);
+
+ enqueueUsers(GEPI, GEPOffset);
+ return true;
+ }
+
+ bool visitLoadInst(LoadInst &LI) {
+ return handleLoadOrStore(LI.getType(), LI);
+ }
+
+ bool visitStoreInst(StoreInst &SI) {
+ if (SI.getOperand(0) == *U)
+ return markAsEscaping(SI);
+
+ return handleLoadOrStore(SI.getOperand(0)->getType(), SI);
+ }
+
+
+ bool visitMemSetInst(MemSetInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ insertUse(II, Length ? Length->getZExtValue() : AllocSize - Offset, Length);
+ return true;
+ }
+
+ bool visitMemTransferInst(MemTransferInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
+ if (!Size)
+ // Zero-length mem transfer intrinsics can be ignored entirely.
+ return true;
+
+ MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+
+ // Only intrinsics with a constant length can be split.
+ Offsets.IsSplittable = Length;
+
+ if (*U != II.getRawDest()) {
+ assert(*U == II.getRawSource());
+ Offsets.SourceBegin = Offset;
+ Offsets.SourceEnd = Offset + Size;
+ } else {
+ Offsets.DestBegin = Offset;
+ Offsets.DestEnd = Offset + Size;
+ }
+
+ insertUse(II, Size, Offsets.IsSplittable);
+ unsigned NewIdx = P.Partitions.size() - 1;
+
+ SmallDenseMap<Instruction *, unsigned>::const_iterator PMI;
+ bool Inserted = false;
+ llvm::tie(PMI, Inserted)
+ = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
+ if (!Inserted && Offsets.IsSplittable) {
+ // We've found a memory transfer intrinsic which refers to the alloca as
+ // both a source and dest. We refuse to split these to simplify splitting
+ // logic. If possible, SROA will still split them into separate allocas
+ // and then re-analyze.
+ Offsets.IsSplittable = false;
+ P.Partitions[PMI->second].IsSplittable = false;
+ P.Partitions[NewIdx].IsSplittable = false;
+ }
+
+ return true;
+ }
+
+ // Disable SRoA for any intrinsics except for lifetime invariants.
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end) {
+ ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
+ uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());
+ insertUse(II, Size, true);
+ return true;
+ }
+
+ return markAsEscaping(II);
+ }
+
+ Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
+ // We consider any PHI or select that results in a direct load or store of
+ // the same offset to be a viable use for partitioning purposes. These uses
+ // are considered unsplittable and the size is the maximum loaded or stored
+ // size.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
+ Visited.insert(Root);
+ Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ do {
+ Instruction *I, *UsedI;
+ llvm::tie(UsedI, I) = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Size = std::max(Size, TD.getTypeStoreSize(LI->getType()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ Value *Op = SI->getOperand(0);
+ if (Op == UsedI)
+ return SI;
+ Size = std::max(Size, TD.getTypeStoreSize(Op->getType()));
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllZeroIndices())
+ return GEP;
+ } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
+ !isa<SelectInst>(I)) {
+ return I;
+ }
+
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Uses.push_back(std::make_pair(I, cast<Instruction>(*UI)));
+ } while (!Uses.empty());
+
+ return 0;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
+ if (PHIInfo.first) {
+ PHIInfo.second = true;
+ insertUse(PN, PHIInfo.first);
+ return true;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+ return markAsEscaping(*EscapingI);
+
+ insertUse(PN, PHIInfo.first);
+ return true;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ if (Value *Result = foldSelectInst(SI)) {
+ if (Result == *U)
+ // If the result of the constant fold will be the pointer, recurse
+ // through the select as if we had RAUW'ed it.
+ enqueueUsers(SI, Offset);
+
+ return true;
+ }
+
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
+ if (SelectInfo.first) {
+ SelectInfo.second = true;
+ insertUse(SI, SelectInfo.first);
+ return true;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+ return markAsEscaping(*EscapingI);
+
+ insertUse(SI, SelectInfo.first);
+ return true;
+ }
+
+ /// \brief Disable SROA entirely if there are unhandled users of the alloca.
+ bool visitInstruction(Instruction &I) { return markAsEscaping(I); }
+};
+
+
+/// \brief Use adder for the alloca partitioning.
+///
+/// This class adds the uses of an alloca to all of the partitions which it
+/// uses. For splittable partitions, this can end up doing essentially a linear
+/// walk of the partitions, but the number of steps remains bounded by the
+/// total result instruction size:
+/// - The number of partitions is a result of the number unsplittable
+/// instructions using the alloca.
+/// - The number of users of each partition is at worst the total number of
+/// splittable instructions using the alloca.
+/// Thus we will produce N * M instructions in the end, where N are the number
+/// of unsplittable uses and M are the number of splittable. This visitor does
+/// the exact same number of updates to the partitioning.
+///
+/// In the more common case, this visitor will leverage the fact that the
+/// partition space is pre-sorted, and do a logarithmic search for the
+/// partition needed, making the total visit a classical ((N + M) * log(N))
+/// complexity operation.
+class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> {
+ friend class InstVisitor<UseBuilder>;
+
+ /// \brief Set to de-duplicate dead instructions found in the use walk.
+ SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
+
+public:
+ UseBuilder(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : BuilderBase(TD, AI, P) {}
+
+ /// \brief Run the builder over the allocation.
+ void operator()() {
+ // Note that we have to re-evaluate size on each trip through the loop as
+ // the queue grows at the tail.
+ for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
+ U = Queue[Idx].U;
+ Offset = Queue[Idx].Offset;
+ this->visit(cast<Instruction>(U->getUser()));
+ }
+ }
+
+private:
+ void markAsDead(Instruction &I) {
+ if (VisitedDeadInsts.insert(&I))
+ P.DeadUsers.push_back(&I);
+ }
+
+ void insertUse(uint64_t Size, Instruction &User) {
+ uint64_t BeginOffset = Offset, EndOffset = Offset + Size;
+
+ // If the use extends outside of the allocation, record it as a dead use
+ // for elimination later.
+ if (BeginOffset >= AllocSize || Size == 0)
+ return markAsDead(User);
+
+ // Bound the use by the size of the allocation.
+ if (EndOffset > AllocSize)
+ EndOffset = AllocSize;
+
+ // NB: This only works if we have zero overlapping partitions.
+ iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
+ B = llvm::prior(B);
+ for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
+ ++I) {
+ PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
+ std::min(I->EndOffset, EndOffset),
+ &User, cast<Instruction>(*U));
+ P.Uses[I - P.begin()].push_back(NewUse);
+ if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
+ P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
+ = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
+ }
+ }
+
+ void handleLoadOrStore(Type *Ty, Instruction &I) {
+ uint64_t Size = TD.getTypeStoreSize(Ty);
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset >= AllocSize || Size > AllocSize || Offset + Size > AllocSize)
+ return markAsDead(I);
+
+ insertUse(Size, I);
+ }
+
+ void visitBitCastInst(BitCastInst &BC) {
+ if (BC.use_empty())
+ return markAsDead(BC);
+
+