diff options
-rw-r--r-- | include/llvm/ADT/DenseMap.h | 10 | ||||
-rw-r--r-- | include/llvm/ADT/SparseBitVector.h | 39 | ||||
-rw-r--r-- | lib/Analysis/IPA/Andersens.cpp | 1017 |
3 files changed, 961 insertions, 105 deletions
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 0b9cddf092..78d8f83cbb 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -99,6 +99,9 @@ public: bool empty() const { return NumEntries == 0; } unsigned size() const { return NumEntries; } + + /// Grow the densemap so that it has at least Size buckets. Does not shrink + void resize(size_t Size) { grow(Size); } void clear() { // If the capacity of the array is huge, and the # elements used is small, @@ -228,7 +231,7 @@ private: // causing infinite loops in lookup. if (NumEntries*4 >= NumBuckets*3 || NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) { - this->grow(); + this->grow(NumBuckets * 2); LookupBucketFor(Key, TheBucket); } ++NumEntries; @@ -310,12 +313,13 @@ private: new (&Buckets[i].first) KeyT(EmptyKey); } - void grow() { + void grow(unsigned AtLeast) { unsigned OldNumBuckets = NumBuckets; BucketT *OldBuckets = Buckets; // Double the number of buckets. - NumBuckets <<= 1; + while (NumBuckets <= AtLeast) + NumBuckets <<= 1; NumTombstones = 0; Buckets = reinterpret_cast<BucketT*>(new char[sizeof(BucketT)*NumBuckets]); diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 6462688673..97439706c5 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -75,7 +75,6 @@ private: } friend struct ilist_traits<SparseBitVectorElement<ElementSize> >; - public: explicit SparseBitVectorElement(unsigned Idx) { ElementIndex = Idx; @@ -287,6 +286,14 @@ public: } BecameZero = allzero; } + // Get a hash value for this element; + uint64_t getHashValue() const { + uint64_t HashVal = 0; + for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) { + HashVal ^= Bits[i]; + } + return HashVal; + } }; template <unsigned ElementSize = 128> @@ -544,22 +551,20 @@ public: return false; } - bool operator!=(const SparseBitVector &RHS) { + bool operator!=(const SparseBitVector &RHS) const { return !(*this == RHS); } - bool operator==(const SparseBitVector &RHS) { + bool operator==(const SparseBitVector &RHS) const { ElementListConstIter Iter1 = Elements.begin(); ElementListConstIter Iter2 = RHS.Elements.begin(); - while (Iter2 != RHS.Elements.end()) { - if (Iter1->index() != Iter2->index() - || *Iter1 != *Iter2) + for (; Iter1 != Elements.end() && Iter2 != RHS.Elements.end(); + ++Iter1, ++Iter2) { + if (*Iter1 != *Iter2) return false; - ++Iter1; - ++Iter2; } - return Iter1 == Elements.end(); + return Iter1 == Elements.end() && Iter2 == RHS.Elements.end(); } // Union our bitmap with the RHS and return true if we changed. @@ -789,6 +794,17 @@ public: return iterator(this, ~0); } + // Get a hash value for this bitmap. + uint64_t getHashValue() const { + uint64_t HashVal = 0; + for (ElementListConstIter Iter = Elements.begin(); + Iter != Elements.end(); + ++Iter) { + HashVal ^= Iter->index(); + HashVal ^= Iter->getHashValue(); + } + return HashVal; + } }; // Convenience functions to allow Or and And without dereferencing in the user @@ -828,9 +844,10 @@ void dump(const SparseBitVector<ElementSize> &LHS, llvm::OStream &out) { for (bi = LHS.begin(); bi != LHS.end(); ++bi) { out << *bi << " "; } - out << "\n"; + out << " ]\n"; } - } + + #endif diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 9a1ff569b8..653ffdded8 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -16,7 +16,8 @@ // This algorithm is implemented as three stages: // 1. Object identification. // 2. Inclusion constraint identification. -// 3. Inclusion constraint solving. +// 3. Offline constraint graph optimization +// 4. Inclusion constraint solving. // // The object identification stage identifies all of the memory objects in the // program, which includes globals, heap allocated objects, and stack allocated @@ -29,20 +30,25 @@ // B can point to. Constraints can handle copies, loads, and stores, and // address taking. // +// The Offline constraint graph optimization portion includes offline variable +// substitution algorithms intended to pointer and location equivalences. +// Pointer equivalences are those pointers that will have the same points-to +// sets, and location equivalences are those variables that always appear +// together in points-to sets. +// // The inclusion constraint solving phase iteratively propagates the inclusion // constraints until a fixed point is reached. This is an O(N^3) algorithm. // // Function constraints are handled as if they were structs with X fields. // Thus, an access to argument X of function Y is an access to node index // getNode(Y) + X. This representation allows handling of indirect calls -// without any issues. To wit, an indirect call Y(a,b) is equivalence to +// without any issues. To wit, an indirect call Y(a,b) is equivalent to // *(Y + 1) = a, *(Y + 2) = b. // The return node for a function is always located at getNode(F) + // CallReturnPos. The arguments start at getNode(F) + CallArgPos. // // Future Improvements: -// Offline variable substitution, offline detection of online -// cycles. Use of BDD's. +// Offline detection of online cycles. Use of BDD's. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "anders-aa" @@ -59,6 +65,7 @@ #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" #include <algorithm> #include <set> #include <list> @@ -66,18 +73,42 @@ #include <vector> using namespace llvm; -STATISTIC(NumIters , "Number of iterations to reach convergence"); -STATISTIC(NumConstraints , "Number of constraints"); -STATISTIC(NumNodes , "Number of nodes"); -STATISTIC(NumUnified , "Number of variables unified"); +STATISTIC(NumIters , "Number of iterations to reach convergence"); +STATISTIC(NumConstraints, "Number of constraints"); +STATISTIC(NumNodes , "Number of nodes"); +STATISTIC(NumUnified , "Number of variables unified"); namespace { const unsigned SelfRep = (unsigned)-1; const unsigned Unvisited = (unsigned)-1; // Position of the function return node relative to the function node. - const unsigned CallReturnPos = 2; + const unsigned CallReturnPos = 1; // Position of the function call node relative to the function node. - const unsigned CallFirstArgPos = 3; + const unsigned CallFirstArgPos = 2; + + struct BitmapKeyInfo { + static inline SparseBitVector<> *getEmptyKey() { + return reinterpret_cast<SparseBitVector<> *>(-1); + } + static inline SparseBitVector<> *getTombstoneKey() { + return reinterpret_cast<SparseBitVector<> *>(-2); + } + static unsigned getHashValue(const SparseBitVector<> *bitmap) { + return bitmap->getHashValue(); + } + static bool isEqual(const SparseBitVector<> *LHS, + const SparseBitVector<> *RHS) { + if (LHS == RHS) + return true; + else if (LHS == getEmptyKey() || RHS == getEmptyKey() + || LHS == getTombstoneKey() || RHS == getTombstoneKey()) + return false; + + return *LHS == *RHS; + } + + static bool isPod() { return true; } + }; class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis, private InstVisitor<Andersens> { @@ -89,7 +120,7 @@ namespace { /// 'store' for statements like "*A = B", and AddressOf for statements like /// A = alloca; The Offset is applied as *(A + K) = B for stores, /// A = *(B + K) for loads, and A = B + K for copies. It is - /// illegal on addressof constraints (Because it is statically + /// illegal on addressof constraints (because it is statically /// resolvable to A = &C where C = B + K) struct Constraint { @@ -105,29 +136,53 @@ namespace { } }; - // Node class - This class is used to represent a node - // in the constraint graph. Due to various optimizations, - // not always the case that there is a mapping from a Node to a - // Value. In particular, we add artificial - // Node's that represent the set of pointed-to variables - // shared for each location equivalent Node. + // Node class - This class is used to represent a node in the constraint + // graph. Due to various optimizations, not always the case that there is a + // mapping from a Node to a Value. In particular, we add artificial Node's + // that represent the set of pointed-to variables shared for each location + // equivalent Node. struct Node { - Value *Val; + Value *Val; SparseBitVector<> *Edges; SparseBitVector<> *PointsTo; SparseBitVector<> *OldPointsTo; bool Changed; std::list<Constraint> Constraints; - // Nodes in cycles (or in equivalence classes) are united - // together using a standard union-find representation with path - // compression. NodeRep gives the index into GraphNodes - // representative for this one. - unsigned NodeRep; public: - - Node() : Val(0), Edges(0), PointsTo(0), OldPointsTo(0), Changed(false), - NodeRep(SelfRep) { - } + // Pointer and location equivalence labels + unsigned PointerEquivLabel; + unsigned LocationEquivLabel; + // Predecessor edges, both real and implicit + SparseBitVector<> *PredEdges; + SparseBitVector<> *ImplicitPredEdges; + // Set of nodes that point to us, only use for location equivalence. + SparseBitVector<> *PointedToBy; + // Number of incoming edges, used during variable substitution to early + // free the points-to sets + unsigned NumInEdges; + // True if our ponits-to set is in the Set2PEClass map + bool StoredInHash; + // True if our node has no indirect constraints (Complex or otherwise) + bool Direct; + // True if the node is address taken, *or* it is part of a group of nodes + // that must be kept together. This is set to true for functions and + // their arg nodes, which must be kept at the same position relative to + // their base function node. + // kept at the same position relative to their base function node. + bool AddressTaken; + + // Nodes in cycles (or in equivalence classes) are united together using a + // standard union-find representation with path compression. NodeRep + // gives the index into GraphNodes for the representative Node. + unsigned NodeRep; + public: + + Node(bool direct = true) : + Val(0), Edges(0), PointsTo(0), OldPointsTo(0), Changed(false), + PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0), + ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0), + StoredInHash(false), Direct(direct), AddressTaken(false), + NodeRep(SelfRep) { } Node *setValue(Value *V) { assert(Val == 0 && "Value already set for this node!"); @@ -163,28 +218,28 @@ namespace { /// ValueNodes - This map indicates the Node that a particular Value* is /// represented by. This contains entries for all pointers. - std::map<Value*, unsigned> ValueNodes; + DenseMap<Value*, unsigned> ValueNodes; /// ObjectNodes - This map contains entries for each memory object in the /// program: globals, alloca's and mallocs. - std::map<Value*, unsigned> ObjectNodes; + DenseMap<Value*, unsigned> ObjectNodes; /// ReturnNodes - This map contains an entry for each function in the /// program that returns a value. - std::map<Function*, unsigned> ReturnNodes; + DenseMap<Function*, unsigned> ReturnNodes; /// VarargNodes - This map contains the entry used to represent all pointers /// passed through the varargs portion of a function call for a particular /// function. An entry is not present in this map for functions that do not /// take variable arguments. - std::map<Function*, unsigned> VarargNodes; + DenseMap<Function*, unsigned> VarargNodes; /// Constraints - This vector contains a list of all of the constraints /// identified by the program. std::vector<Constraint> Constraints; - // Map from graph node to maximum K value that is allowed (For functions, + // Map from graph node to maximum K value that is allowed (for functions, // this is equivalent to the number of arguments + CallFirstArgPos) std::map<unsigned, unsigned> MaxK; @@ -193,9 +248,10 @@ namespace { enum { UniversalSet = 0, NullPtr = 1, - NullObject = 2 + NullObject = 2, + NumberSpecialNodes }; - // Stack for Tarjans + // Stack for Tarjan's std::stack<unsigned> SCCStack; // Topological Index -> Graph node std::vector<unsigned> Topo2Node; @@ -209,6 +265,34 @@ namespace { unsigned DFSNumber; unsigned RPONumber; + // Offline variable substitution related things + + // Temporary rep storage, used because we can't collapse SCC's in the + // predecessor graph by uniting the variables permanently, we can only do so + // for the successor graph. + std::vector<unsigned> VSSCCRep; + // Mapping from node to whether we have visited it during SCC finding yet. + std::vector<bool> Node2Visited; + // During variable substitution, we create unknowns to represent the unknown + // value that is a dereference of a variable. These nodes are known as + // "ref" nodes (since they represent the value of dereferences). + unsigned FirstRefNode; + // During HVN, we create represent address taken nodes as if they were + // unknown (since HVN, unlike HU, does not evaluate unions). + unsigned FirstAdrNode; + // Current pointer equivalence class number + unsigned PEClass; + // Mapping from points-to sets to equivalence classes + typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap; + BitVectorMap Set2PEClass; + // Mapping from pointer equivalences to the representative node. -1 if we + // have no representative node for this pointer equivalence class yet. + std::vector<int> PEClass2Node; + // Mapping from pointer equivalences to representative node. This includes + // pointer equivalent but not location equivalent variables. -1 if we have + // no representative node for this pointer equivalence class yet. + std::vector<int> PENLEClass2Node; + public: static char ID; Andersens() : ModulePass((intptr_t)&ID) {} @@ -217,7 +301,11 @@ namespace { InitializeAliasAnalysis(this); IdentifyObjects(M); CollectConstraints(M); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-constraints" DEBUG(PrintConstraints()); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" SolveConstraints(); DEBUG(PrintPointsToGraph()); @@ -275,7 +363,7 @@ namespace { if (!isa<GlobalValue>(C)) return getNodeForConstantPointer(C); - std::map<Value*, unsigned>::iterator I = ValueNodes.find(V); + DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V); if (I == ValueNodes.end()) { #ifndef NDEBUG V->dump(); @@ -288,7 +376,7 @@ namespace { /// getObject - Return the node corresponding to the memory object for the /// specified global or allocation instruction. unsigned getObject(Value *V) { - std::map<Value*, unsigned>::iterator I = ObjectNodes.find(V); + DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V); assert(I != ObjectNodes.end() && "Value does not have an object in the points-to graph!"); return I->second; @@ -297,7 +385,7 @@ namespace { /// getReturnNode - Return the node representing the return value for the /// specified function. unsigned getReturnNode(Function *F) { - std::map<Function*, unsigned>::iterator I = ReturnNodes.find(F); + DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F); assert(I != ReturnNodes.end() && "Function does not return a value!"); return I->second; } @@ -305,7 +393,7 @@ namespace { /// getVarargNode - Return the node representing the variable arguments /// formal for the specified function. unsigned getVarargNode(Function *F) { - std::map<Function*, unsigned>::iterator I = VarargNodes.find(F); + DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F); assert(I != VarargNodes.end() && "Function does not take var args!"); return I->second; } @@ -325,9 +413,18 @@ namespace { void CollectConstraints(Module &M); bool AnalyzeUsesOfFunction(Value *); void CreateConstraintGraph(); + void OptimizeConstraints(); + unsigned FindEquivalentNode(unsigned, unsigned); + void ClumpAddressTaken(); + void RewriteConstraints(); + void HU(); + void HVN(); + void UnitePointerEquivalences(); void SolveConstraints(); void QueryNode(unsigned Node); - + void Condense(unsigned Node); + void HUValNum(unsigned Node); + void HVNValNum(unsigned Node); unsigned getNodeForConstantPointer(Constant *C); unsigned getNodeForConstantPointerTarget(Constant *C); void AddGlobalInitializerConstraints(unsigned, Constant *C); @@ -339,6 +436,8 @@ namespace { void PrintNode(Node *N); void PrintConstraints(); + void PrintConstraint(const Constraint &); + void PrintLabels(); void PrintPointsToGraph(); //===------------------------------------------------------------------===// @@ -506,7 +605,6 @@ void Andersens::IdentifyObjects(Module &M) { // The function itself is a memory object. unsigned First = NumObjects; ValueNodes[F] = NumObjects++; - ObjectNodes[F] = NumObjects++; if (isa<PointerType>(F->getFunctionType()->getReturnType())) ReturnNodes[F] = NumObjects++; if (F->getFunctionType()->isVarArg()) @@ -516,10 +614,11 @@ void Andersens::IdentifyObjects(Module &M) { // Add nodes for all of the incoming pointer arguments. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (isa<PointerType>(I->getType())) - ValueNodes[I] = NumObjects++; + { + if (isa<PointerType>(I->getType())) + ValueNodes[I] = NumObjects++; + } MaxK[First] = NumObjects - First; - MaxK[First + 1] = NumObjects - First - 1; // Scan the function body, creating a memory object for each heap/stack // allocation in the body of the function and a node to represent all @@ -796,11 +895,6 @@ void Andersens::CollectConstraints(Module &M) { } for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - // Make the function address point to the function object. - unsigned ObjectIndex = getObject(F); - GraphNodes[ObjectIndex].setValue(F); - Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*F), - ObjectIndex)); // Set up the return value node. if (isa<PointerType>(F->getFunctionType()->getReturnType())) GraphNodes[getReturnNode(F)].setValue(F); @@ -1091,8 +1185,736 @@ bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { return Result; } -// Create the constraint graph used for solving points-to analysis. -// +void dumpToDOUT(SparseBitVector<> *bitmap) { + dump(*bitmap, DOUT); +} + + +/// Clump together address taken variables so that the points-to sets use up +/// less space and can be operated on faster. + +void Andersens::ClumpAddressTaken() { +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa-renumber" + std::vector<unsigned> Translate; + std::vector<Node> NewGraphNodes; + + Translate.resize(GraphNodes.size()); + unsigned NewPos = 0; + + for (unsigned i = 0; i < Constraints.size(); ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + } + } + for (unsigned i = 0; i < NumberSpecialNodes; ++i) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + + // I believe this ends up being faster than making two vectors and splicing + // them. + for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { + if (GraphNodes[i].AddressTaken) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + } + + for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { + if (!GraphNodes[i].AddressTaken) { + unsigned Pos = NewPos++; + Translate[i] = Pos; + NewGraphNodes.push_back(GraphNodes[i]); + DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + } + } + + for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin(); + Iter != ValueNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin(); + Iter != ObjectNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin(); + Iter != ReturnNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin(); + Iter != VarargNodes.end(); + ++Iter) + Iter->second = Translate[Iter->second]; + + for (unsigned i = 0; i < Constraints.size(); ++i) { + Constraint &C = Constraints[i]; + C.Src = Translate[C.Src]; + C.Dest = Translate[C.Dest]; + } + + GraphNodes.swap(NewGraphNodes); +#undef DEBUG_TYPE +#define DEBUG_TYPE "anders-aa" +} + +/// The technique used here is described in "Exploiting Pointer and Location +/// Equivalence to Optimize Pointer Analysis. In the 14th International Static +/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm, +/// and is equivalent to value numbering the collapsed constraint graph without +/// evaluating unions. This is used as a pre-pass to HU in order to resolve +/// first order pointer dereferences and speed up/reduce memory usage of HU. +/// Running both is equivalent to HRU without the iteration +/// HVN in more detail: +/// Imagine the set of constraints was simply straight line code with no loops +/// (we eliminate cycles, so there are no loops), such as: +/// E = &D +/// E = &C +/// E = F +/// F = G +/// G = F +/// Applying value numbering to this code tells us: +/// G == F == E +/// +/// For HVN, this is as far as it goes. We assign new value numbers to every +/// "address node", and every "reference node". +/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a +/// cycle must have the same value number since the = operation is really +/// inclusion, not overwrite), and value number nodes we receive points-to sets +/// before we value our own node. +/// The advantage of HU over HVN is that HU considers the inclusion property, so +/// that if you have +/// E = &D +/// E = &C +/// E = F +/// F = G +/// F = &D +/// G = F +/// HU will determine that G == F == E. HVN will not, because it cannot prove +/// that the points to information ends up being the same because they all +/// receive &D from E anyway. + +void Andersens::HVN() { + DOUT << "Beginning HVN\n"; + // Build a predecessor graph. This is like our constraint graph with the + // edges going in the opposite direction, and there are edges for all the + // constraints, instead of just copy constraints. We also build implicit + // edges for constraints are implied but not explicit. I.E for the constraint + // a = &b, we add implicit edges *a = b. This helps us capture more cycles + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + GraphNodes[C.Src].Direct = false; + + // Dest = &src edge + unsigned AdrNode = C.Src + FirstAdrNode; + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(AdrNode); + + // *Dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); + } else if (C.Type == Constraint::Load) { + if (C.Offset == 0) { + // dest = *src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); + } else { + GraphNodes[C.Dest].Direct = false; + } + } else if (C.Type == Constraint::Store) { + if (C.Offset == 0) { + // *dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].PredEdges) + GraphNodes[RefNode].PredEdges = new SparseBitVector<>; + GraphNodes[RefNode].PredEdges->set(C.Src); + } + } else { + // Dest = Src edge and *Dest = *Src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src); + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); + } + } + PEClass = 1; + // Do SCC finding first to condense our predecessor graph + DFSNumber = 0; + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + + for (unsigned i = 0; i < FirstRefNode; ++i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + HVNValNum(Node); + } + for (BitVectorMap::iterator Iter = Set2PEClass.begin(); + Iter != Set2PEClass.end(); + ++Iter) + delete Iter->first; + Set2PEClass.clear(); + Node2DFS.clear(); + Node2Deleted.clear(); + Node2Visited.clear(); + DOUT << "Finished HVN\n"; + +} + +/// This is the workhorse of HVN value numbering. We combine SCC finding at the +/// same time because it's easy. +void Andersens::HVNValNum(unsigned NodeIndex) { + unsigned MyDFS = DFSNumber++; + Node *N = &GraphNodes[NodeIndex]; + Node2Visited[NodeIndex] = true; + Node2DFS[NodeIndex] = MyDFS; + + // First process all our explicit edges + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + HVNValNum(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // Now process all the implicit edges + if (N->ImplicitPredEdges) + for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); + Iter != N->ImplicitPredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + HVNValNum(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // See if we found any cycles + if (MyDFS == Node2DFS[NodeIndex]) { + while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { + unsigned CycleNodeIndex = SCCStack.top(); + Node *CycleNode = &GraphNodes[CycleNodeIndex]; + VSSCCRep[CycleNodeIndex] = NodeIndex; + // Unify the nodes + N->Direct &= CycleNode->Direct; + + if (CycleNode->PredEdges) { + if (!N->PredEdges) + N->PredEdges = new SparseBitVector<>; + *(N->PredEdges) |= CycleNode->PredEdges; + delete CycleNode->PredEdges; + CycleNode->PredEdges = NULL; + } + if (CycleNode->ImplicitPredEdges) { + if (!N->ImplicitPredEdges) + N->ImplicitPredEdges = new SparseBitVector<>; + *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges; + delete CycleNode->ImplicitPredEdges; + CycleNode->ImplicitPredEdges = NULL; + } + + SCCStack.pop(); + } + + Node2Deleted[NodeIndex] = true; + + if (!N->Direct) { + GraphNodes[NodeIndex].PointerEquivLabel = PEClass++; + return; + } + + // Collect labels of successor nodes + bool AllSame = true; + unsigned First = ~0; + SparseBitVector<> *Labels = new SparseBitVector<>; + bool Used = false; + + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + unsigned Label = GraphNodes[j].PointerEquivLabel; + // Ignore labels that are equal to us or non-pointers + if (j == NodeIndex || Label == 0) + continue; + if (First == (unsigned)~0) + First = Label; + else if (First != Label) + AllSame = false; + Labels->set(Label); + } + + // We either have a non-pointer, a copy of an existing node, or a new node. + // Assign the appropriate pointer equivalence label. + if (Labels->empty()) { + GraphNodes[NodeIndex].PointerEquivLabel = 0; + } else if (AllSame) { + GraphNodes[NodeIndex].PointerEquivLabel = First; + } else { + GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels]; + if (GraphNodes[NodeIndex].PointerEquivLabel == 0) { + unsigned EquivClass = PEClass++; + Set2PEClass[Labels] = EquivClass; + GraphNodes[NodeIndex].PointerEquivLabel = EquivClass; + Used = true; + } + } + if (!Used) + delete Labels; + } else { + SCCStack.push(NodeIndex); + } +} + +/// The technique used here is described in "Exploiting Pointer and Location +/// Equivalence to Optimize Pointer Analysis. In the 14th International Static +/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm, +/// and is equivalent to value numbering the collapsed constraint graph +/// including evaluating unions. +void Andersens::HU() { + DOUT << "Beginning HU\n"; + // Build a predecessor graph. This is like our constraint graph with the + // edges going in the opposite direction, and there are edges for all the + // constraints, instead of just copy constraints. We also build implicit + // edges for constraints are implied but not explicit. I.E for the constraint + // a = &b, we add implicit edges *a = b. This helps us capture more cycles + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + if (C.Type == Constraint::AddressOf) { + GraphNodes[C.Src].AddressTaken = true; + GraphNodes[C.Src].Direct = false; + + GraphNodes[C.Dest].PointsTo->set(C.Src); + // *Dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); + GraphNodes[C.Src].PointedToBy->set(C.Dest); + } else if (C.Type == Constraint::Load) { + if (C.Offset == 0) { + // dest = *src edge + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); + } else { + GraphNodes[C.Dest].Direct = false; + } + } else if (C.Type == Constraint::Store) { + if (C.Offset == 0) { + // *dest = src edge + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].PredEdges) + GraphNodes[RefNode].PredEdges = new SparseBitVector<>; + GraphNodes[RefNode].PredEdges->set(C.Src); + } + } else { + // Dest = Src edge and *Dest = *Src edg + if (!GraphNodes[C.Dest].PredEdges) + GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; + GraphNodes[C.Dest].PredEdges->set(C.Src); + unsigned RefNode = C.Dest + FirstRefNode; + if (!GraphNodes[RefNode].ImplicitPredEdges) + GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; + GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); + } + } + PEClass = 1; + // Do SCC finding first to condense our predecessor graph + DFSNumber = 0; + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + + for (unsigned i = 0; i < FirstRefNode; ++i) { + if (FindNode(i) == i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + Condense(Node); + } + } + + // Reset tables for actual labeling + Node2DFS.clear(); + Node2Visited.clear(); + Node2Deleted.clear(); + // Pre-grow our densemap so that we don't get really bad behavior + Set2PEClass.resize(GraphNodes.size()); + + // Visit the condensed graph and generate pointer equivalence labels. + Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); + for (unsigned i = 0; i < FirstRefNode; ++i) { + if (FindNode(i) == i) { + unsigned Node = VSSCCRep[i]; + if (!Node2Visited[Node]) + HUValNum(Node); + } + } + // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller. + Set2PEClass.clear(); + DOUT << "Finished HU\n"; +} + + +/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla. +void Andersens::Condense(unsigned NodeIndex) { + unsigned MyDFS = DFSNumber++; + Node *N = &GraphNodes[NodeIndex]; + Node2Visited[NodeIndex] = true; + Node2DFS[NodeIndex] = MyDFS; + + // First process all our explicit edges + if (N->PredEdges) + for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); + Iter != N->PredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + Condense(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // Now process all the implicit edges + if (N->ImplicitPredEdges) + for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); + Iter != N->ImplicitPredEdges->end(); + ++Iter) { + unsigned j = VSSCCRep[*Iter]; + if (!Node2Deleted[j]) { + if (!Node2Visited[j]) + Condense(j); + if (Node2DFS[NodeIndex] > Node2DFS[j]) + Node2DFS[NodeIndex] = Node2DFS[j]; + } + } + + // See if we found any cycles + if (MyDFS == Node2DFS[NodeIndex]) { + while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { + unsigned CycleNodeIndex = SCCStack.top(); + Node *CycleNode = &GraphNodes[CycleNodeIndex]; + VSSCCRep[CycleNodeIndex] = NodeIndex; + // Unify the nodes + N->Direct &= CycleNode->Direct; + + *(N->PointsTo) |= CycleNode->PointsTo; + delete CycleNode->PointsTo; + CycleNode->PointsTo = NULL; + if (CycleNode->PredEdges) { + if (!N->PredEdges) + N->PredEdges = new SparseBitVector<>; + *(N->PredEdges) |= CycleNode->PredEdges; |