diff options
author | Daniel Berlin <dberlin@dberlin.org> | 2007-09-16 21:45:02 +0000 |
---|---|---|
committer | Daniel Berlin <dberlin@dberlin.org> | 2007-09-16 21:45:02 +0000 |
commit | aad158891c1881a81cbb8bf296d31aab91ffeb2b (patch) | |
tree | f17c2665cc5bdfef3e962f07b490355d3e736061 | |
parent | bd626b885f92a2f4a583e4c1710be8cc543962c1 (diff) |
Rewrite of andersen's to be about 100x faster, cleaner, and begin to support field sensitivity
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42016 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Analysis/IPA/Andersens.cpp | 1011 |
1 files changed, 687 insertions, 324 deletions
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 4c0d2468e7..fed246091d 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -7,14 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file defines a very simple implementation of Andersen's interprocedural -// alias analysis. This implementation does not include any of the fancy -// features that make Andersen's reasonably efficient (like cycle elimination or -// variable substitution), but it should be useful for getting precision -// numbers and can be extended in the future. +// This file defines an implementation of Andersen's interprocedural alias +// analysis // // In pointer analysis terms, this is a subset-based, flow-insensitive, -// field-insensitive, and context-insensitive algorithm pointer algorithm. +// field-sensitive, and context-insensitive algorithm pointer algorithm. // // This algorithm is implemented as three stages: // 1. Object identification. @@ -29,24 +26,23 @@ // in the program by scanning the program, looking for pointer assignments and // other statements that effect the points-to graph. For a statement like "A = // B", this statement is processed to indicate that A can point to anything that -// B can point to. Constraints can handle copies, loads, and stores. +// B can point to. Constraints can handle copies, loads, and stores, and +// address taking. // // The inclusion constraint solving phase iteratively propagates the inclusion // constraints until a fixed point is reached. This is an O(N^3) algorithm. // -// In the initial pass, all indirect function calls are completely ignored. As -// the analysis discovers new targets of function pointers, it iteratively -// resolves a precise (and conservative) call graph. Also related, this -// analysis initially assumes that all internal functions have known incoming -// pointers. If we find that an internal function's address escapes outside of -// the program, we update this assumption. +// Function constraints are handled as if they were structs with X fields. +// Thus, an access to argument X of function Y is an access to node index +// getNode(Y) + X. This representation allows handling of indirect calls +// without any issues. To wit, an indirect call Y(a,b) is equivalence to +// *(Y + 1) = a, *(Y + 2) = b. +// The return node for a function is always located at getNode(F) + +// CallReturnPos. The arguments start at getNode(F) + CallArgPos. // // Future Improvements: -// This implementation of Andersen's algorithm is extremely slow. To make it -// scale reasonably well, the inclusion constraints could be sorted (easy), -// offline variable substitution would be a huge win (straight-forward), and -// online cycle elimination (trickier) might help as well. -// +// Offline variable substitution, offline detection of online +// cycles. Use of BDD's. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "anders-aa" @@ -62,31 +58,77 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SparseBitVector.h" #include <algorithm> #include <set> -using namespace llvm; +#include <list> +#include <stack> +#include <vector> +using namespace llvm; STATISTIC(NumIters , "Number of iterations to reach convergence"); STATISTIC(NumConstraints , "Number of constraints"); STATISTIC(NumNodes , "Number of nodes"); -STATISTIC(NumEscapingFunctions, "Number of internal functions that escape"); -STATISTIC(NumIndirectCallees , "Number of indirect callees found"); +STATISTIC(NumUnified , "Number of variables unified"); namespace { + const unsigned SelfRep = (unsigned)-1; + const unsigned Unvisited = (unsigned)-1; + // Position of the function return node relative to the function node. + const unsigned CallReturnPos = 2; + // Position of the function call node relative to the function node. + const unsigned CallFirstArgPos = 3; + class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis, private InstVisitor<Andersens> { - public: - static char ID; // Class identification, replacement for typeinfo - Andersens() : ModulePass((intptr_t)&ID) {} - private: - /// Node class - This class is used to represent a memory object in the - /// program, and is the primitive used to build the points-to graph. - class Node { - std::vector<Node*> Pointees; - Value *Val; - public: - static const unsigned ID; // Pass identification, replacement for typeid - Node() : Val(0) {} + class Node; + + /// Constraint - Objects of this structure are used to represent the various + /// constraints identified by the algorithm. The constraints are 'copy', + /// for statements like "A = B", 'load' for statements like "A = *B", + /// 'store' for statements like "*A = B", and AddressOf for statements like + /// A = alloca; The Offset is applied as *(A + K) = B for stores, + /// A = *(B + K) for loads, and A = B + K for copies. It is + /// illegal on addressof constraints (Because it is statically + /// resolvable to A = &C where C = B + K) + + struct Constraint { + enum ConstraintType { Copy, Load, Store, AddressOf } Type; + unsigned Dest; + unsigned Src; + unsigned Offset; + + Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0) + : Type(Ty), Dest(D), Src(S), Offset(O) { + assert(Offset == 0 || Ty != AddressOf && + "Offset is illegal on addressof constraints"); + } + }; + + // Node class - This class is used to represent a node + // in the constraint graph. Due to various optimizations, + // not always the case that there is a mapping from a Node to a + // Value. In particular, we add artificial + // Node's that represent the set of pointed-to variables + // shared for each location equivalent Node. + struct Node { + Value *Val; + SparseBitVector<> *Edges; + SparseBitVector<> *PointsTo; + SparseBitVector<> *OldPointsTo; + bool Changed; + std::list<Constraint> Constraints; + + // Nodes in cycles (or in equivalence classes) are united + // together using a standard union-find representation with path + // compression. NodeRep gives the index into GraphNodes + // representative for this one. + unsigned NodeRep; public: + + Node() : Val(0), Edges(0), PointsTo(0), OldPointsTo(0), Changed(false), + NodeRep(SelfRep) { + } + Node *setValue(Value *V) { assert(Val == 0 && "Value already set for this node!"); Val = V; @@ -97,21 +139,11 @@ namespace { /// Value *getValue() const { return Val; } - typedef std::vector<Node*>::const_iterator iterator; - iterator begin() const { return Pointees.begin(); } - iterator end() const { return Pointees.end(); } - /// addPointerTo - Add a pointer to the list of pointees of this node, /// returning true if this caused a new pointer to be added, or false if /// we already knew about the points-to relation. - bool addPointerTo(Node *N) { - std::vector<Node*>::iterator I = std::lower_bound(Pointees.begin(), - Pointees.end(), - N); - if (I != Pointees.end() && *I == N) - return false; - Pointees.insert(I, N); - return true; + bool addPointerTo(unsigned Node) { + return PointsTo->test_and_set(Node); } /// intersects - Return true if the points-to set of this node intersects @@ -121,12 +153,7 @@ namespace { /// intersectsIgnoring - Return true if the points-to set of this node /// intersects with the points-to set of the specified node on any nodes /// except for the specified node to ignore. - bool intersectsIgnoring(Node *N, Node *Ignoring) const; - - // Constraint application methods. - bool copyFrom(Node *N); - bool loadFrom(Node *N); - bool storeThrough(Node *N); + bool intersectsIgnoring(Node *N, unsigned) const; }; /// GraphNodes - This vector is populated as part of the object @@ -152,41 +179,14 @@ namespace { /// take variable arguments. std::map<Function*, unsigned> VarargNodes; - /// Constraint - Objects of this structure are used to represent the various - /// constraints identified by the algorithm. The constraints are 'copy', - /// for statements like "A = B", 'load' for statements like "A = *B", and - /// 'store' for statements like "*A = B". - struct Constraint { - enum ConstraintType { Copy, Load, Store } Type; - Node *Dest, *Src; - - Constraint(ConstraintType Ty, Node *D, Node *S) - : Type(Ty), Dest(D), Src(S) {} - }; /// Constraints - This vector contains a list of all of the constraints /// identified by the program. std::vector<Constraint> Constraints; - /// EscapingInternalFunctions - This set contains all of the internal - /// functions that are found to escape from the program. If the address of - /// an internal function is passed to an external function or otherwise - /// escapes from the analyzed portion of the program, we must assume that - /// any pointer arguments can alias the universal node. This set keeps - /// track of those functions we are assuming to escape so far. - std::set<Function*> EscapingInternalFunctions; - - /// IndirectCalls - This contains a list of all of the indirect call sites - /// in the program. Since the call graph is iteratively discovered, we may - /// need to add constraints to our graph as we find new targets of function - /// pointers. - std::vector<CallSite> IndirectCalls; - - /// IndirectCallees - For each call site in the indirect calls list, keep - /// track of the callees that we have discovered so far. As the analysis - /// proceeds, more callees are discovered, until the call graph finally - /// stabilizes. - std::map<CallSite, std::vector<Function*> > IndirectCallees; + // Map from graph node to maximum K value that is allowed (For functions, + // this is equivalent to the number of arguments + CallFirstArgPos) + std::map<unsigned, unsigned> MaxK; /// This enum defines the GraphNodes indices that correspond to important /// fixed sets. @@ -195,8 +195,24 @@ namespace { NullPtr = 1, NullObject = 2 }; + // Stack for Tarjans + std::stack<unsigned> SCCStack; + // Topological Index -> Graph node + std::vector<unsigned> Topo2Node; + // Graph Node -> Topological Index; + std::vector<unsigned> Node2Topo; + // Map from Graph Node to DFS number + std::vector<unsigned> Node2DFS; + // Map from Graph Node to Deleted from graph. + std::vector<bool> Node2Deleted; + // Current DFS and RPO numbers + unsigned DFSNumber; + unsigned RPONumber; public: + static char ID; + Andersens() : ModulePass((intptr_t)&ID) {} + bool runOnModule(Module &M) { InitializeAliasAnalysis(this); IdentifyObjects(M); @@ -210,7 +226,6 @@ namespace { ObjectNodes.clear(); ReturnNodes.clear(); VarargNodes.clear(); - EscapingInternalFunctions.clear(); std::vector<Constraint>().swap(Constraints); return false; } @@ -255,7 +270,7 @@ namespace { private: /// getNode - Return the node corresponding to the specified pointer scalar. /// - Node *getNode(Value *V) { + unsigned getNode(Value *V) { if (Constant *C = dyn_cast<Constant>(V)) if (!isa<GlobalValue>(C)) return getNodeForConstantPointer(C); @@ -267,47 +282,55 @@ namespace { #endif assert(0 && "Value does not have a node in the points-to graph!"); } - return &GraphNodes[I->second]; + return I->second; } /// getObject - Return the node corresponding to the memory object for the /// specified global or allocation instruction. - Node *getObject(Value *V) { + unsigned getObject(Value *V) { std::map<Value*, unsigned>::iterator I = ObjectNodes.find(V); assert(I != ObjectNodes.end() && "Value does not have an object in the points-to graph!"); - return &GraphNodes[I->second]; + return I->second; } /// getReturnNode - Return the node representing the return value for the /// specified function. - Node *getReturnNode(Function *F) { + unsigned getReturnNode(Function *F) { std::map<Function*, unsigned>::iterator I = ReturnNodes.find(F); assert(I != ReturnNodes.end() && "Function does not return a value!"); - return &GraphNodes[I->second]; + return I->second; } /// getVarargNode - Return the node representing the variable arguments /// formal for the specified function. - Node *getVarargNode(Function *F) { + unsigned getVarargNode(Function *F) { std::map<Function*, unsigned>::iterator I = VarargNodes.find(F); assert(I != VarargNodes.end() && "Function does not take var args!"); - return &GraphNodes[I->second]; + return I->second; } /// getNodeValue - Get the node for the specified LLVM value and set the /// value for it to be the specified value. - Node *getNodeValue(Value &V) { - return getNode(&V)->setValue(&V); + unsigned getNodeValue(Value &V) { + unsigned Index = getNode(&V); + GraphNodes[Index].setValue(&V); + return Index; } + unsigned UniteNodes(unsigned First, unsigned Second); + unsigned FindNode(unsigned Node); + void IdentifyObjects(Module &M); void CollectConstraints(Module &M); + bool AnalyzeUsesOfFunction(Value *); + void CreateConstraintGraph(); void SolveConstraints(); + void QueryNode(unsigned Node); - Node *getNodeForConstantPointer(Constant *C); - Node *getNodeForConstantPointerTarget(Constant *C); - void AddGlobalInitializerConstraints(Node *N, Constant *C); + unsigned getNodeForConstantPointer(Constant *C); + unsigned getNodeForConstantPointerTarget(Constant *C); + void AddGlobalInitializerConstraints(unsigned, Constant *C); void AddConstraintsForNonInternalLinkage(Function *F); void AddConstraintsForCall(CallSite CS, Function *F); @@ -337,6 +360,7 @@ namespace { void visitSelectInst(SelectInst &SI); void visitVAArg(VAArgInst &I); void visitInstruction(Instruction &I); + }; char Andersens::ID = 0; @@ -353,12 +377,12 @@ ModulePass *llvm::createAndersensPass() { return new Andersens(); } AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { - Node *N1 = getNode(const_cast<Value*>(V1)); - Node *N2 = getNode(const_cast<Value*>(V2)); + Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))]; + Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))]; // Check to see if the two pointers are known to not alias. They don't alias // if their points-to sets do not intersect. - if (!N1->intersectsIgnoring(N2, &GraphNodes[NullObject])) + if (!N1->intersectsIgnoring(N2, NullObject)) return NoAlias; return AliasAnalysis::alias(V1, V1Size, V2, V2Size); @@ -376,14 +400,12 @@ Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) { // is, after all, a "research quality" implementation of Andersen's analysis. if (Function *F = CS.getCalledFunction()) if (F->isDeclaration()) { - Node *N1 = getNode(P); + Node *N1 = &GraphNodes[FindNode(getNode(P))]; - if (N1->begin() == N1->end()) - return NoModRef; // P doesn't point to anything. + if (N1->PointsTo->empty()) + return NoModRef; - // Get the first pointee. - Node *FirstPointee = *N1->begin(); - if (FirstPointee != &GraphNodes[UniversalSet]) + if (!N1->PointsTo->test(UniversalSet)) return NoModRef; // P doesn't point to the universal set. } @@ -401,30 +423,23 @@ Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { /// variables or any other memory memory objects because we do not track whether /// a pointer points to the beginning of an object or a field of it. void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { - Node *N = getNode(P); - Node::iterator I = N->begin(); - if (I != N->end()) { - // If there is exactly one element in the points-to set for the object... - ++I; - if (I == N->end()) { - Node *Pointee = *N->begin(); - - // If a function is the only object in the points-to set, then it must be - // the destination. Note that we can't handle global variables here, - // because we don't know if the pointer is actually pointing to a field of - // the global or to the beginning of it. - if (Value *V = Pointee->getValue()) { - if (Function *F = dyn_cast<Function>(V)) - RetVals.push_back(F); - } else { - // If the object in the points-to set is the null object, then the null - // pointer is a must alias. - if (Pointee == &GraphNodes[NullObject]) - RetVals.push_back(Constant::getNullValue(P->getType())); - } + Node *N = &GraphNodes[FindNode(getNode(P))]; + if (N->PointsTo->count() == 1) { + Node *Pointee = &GraphNodes[N->PointsTo->find_first()]; + // If a function is the only object in the points-to set, then it must be + // the destination. Note that we can't handle global variables here, + // because we don't know if the pointer is actually pointing to a field of + // the global or to the beginning of it. + if (Value *V = Pointee->getValue()) { + if (Function *F = dyn_cast<Function>(V)) + RetVals.push_back(F); + } else { + // If the object in the points-to set is the null object, then the null + // pointer is a must alias. + if (Pointee == &GraphNodes[NullObject]) + RetVals.push_back(Constant::getNullValue(P->getType())); } } - AliasAnalysis::getMustAliases(P, RetVals); } @@ -434,14 +449,20 @@ void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { /// return true. /// bool Andersens::pointsToConstantMemory(const Value *P) { - Node *N = getNode((Value*)P); - for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) { - if (Value *V = (*I)->getValue()) { + Node *N = &GraphNodes[FindNode(getNode((Value*)P))]; + unsigned i; + + for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); + bi != N->PointsTo->end(); + ++bi) { + i = *bi; + Node *Pointee = &GraphNodes[i]; + if (Value *V = Pointee->getValue()) { if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) && !cast<GlobalVariable>(V)->isConstant())) return AliasAnalysis::pointsToConstantMemory(P); } else { - if (*I != &GraphNodes[NullObject]) + if (i != NullObject) return AliasAnalysis::pointsToConstantMemory(P); } } @@ -483,6 +504,7 @@ void Andersens::IdentifyObjects(Module &M) { // Add nodes for all of the functions and the instructions inside of them. for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { // The function itself is a memory object. + unsigned First = NumObjects; ValueNodes[F] = NumObjects++; ObjectNodes[F] = NumObjects++; if (isa<PointerType>(F->getFunctionType()->getReturnType())) @@ -490,11 +512,14 @@ void Andersens::IdentifyObjects(Module &M) { if (F->getFunctionType()->isVarArg()) VarargNodes[F] = NumObjects++; + // Add nodes for all of the incoming pointer arguments. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (isa<PointerType>(I->getType())) ValueNodes[I] = NumObjects++; + MaxK[First] = NumObjects - First; + MaxK[First + 1] = NumObjects - First - 1; // Scan the function body, creating a memory object for each heap/stack // allocation in the body of the function and a node to represent all @@ -521,11 +546,11 @@ void Andersens::IdentifyObjects(Module &M) { /// getNodeForConstantPointer - Return the node corresponding to the constant /// pointer itself. -Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { +unsigned Andersens::getNodeForConstantPointer(Constant *C) { assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C)) - return &GraphNodes[NullPtr]; + return NullPtr; else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return getNode(GV); else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -533,7 +558,7 @@ Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { case Instruction::GetElementPtr: return getNodeForConstantPointer(CE->getOperand(0)); case Instruction::IntToPtr: - return &GraphNodes[UniversalSet]; + return UniversalSet; case Instruction::BitCast: return getNodeForConstantPointer(CE->getOperand(0)); default: @@ -548,11 +573,11 @@ Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { /// getNodeForConstantPointerTarget - Return the node POINTED TO by the /// specified constant pointer. -Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { +unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); if (isa<ConstantPointerNull>(C)) - return &GraphNodes[NullObject]; + return NullObject; else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return getObject(GV); else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -560,7 +585,7 @@ Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { case Instruction::GetElementPtr: return getNodeForConstantPointerTarget(CE->getOperand(0)); case Instruction::IntToPtr: - return &GraphNodes[UniversalSet]; + return UniversalSet; case Instruction::BitCast: return getNodeForConstantPointerTarget(CE->getOperand(0)); default: @@ -575,19 +600,22 @@ Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { /// AddGlobalInitializerConstraints - Add inclusion constraints for the memory /// object N, which contains values indicated by C. -void Andersens::AddGlobalInitializerConstraints(Node *N, Constant *C) { +void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex, + Constant *C) { if (C->getType()->isFirstClassType()) { if (isa<PointerType>(C->getType())) - N->copyFrom(getNodeForConstantPointer(C)); - + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + getNodeForConstantPointer(C))); } else if (C->isNullValue()) { - N->addPointerTo(&GraphNodes[NullObject]); + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + NullObject)); return; } else if (!isa<UndefValue>(C)) { // If this is an array or struct, include constraints for each element. assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C)); for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - AddGlobalInitializerConstraints(N, cast<Constant>(C->getOperand(i))); + AddGlobalInitializerConstraints(NodeIndex, + cast<Constant>(C->getOperand(i))); } } @@ -600,7 +628,7 @@ void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { // If this is an argument of an externally accessible function, the // incoming pointer might point to anything. Constraints.push_back(Constraint(Constraint::Copy, getNode(I), - &GraphNodes[UniversalSet])); + UniversalSet)); } /// AddConstraintsForCall - If this is a call to a "known" function, add the @@ -653,14 +681,20 @@ bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { // These functions do induce points-to edges. - if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" || + if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" || F->getName() == "llvm.memmove.i32" ||F->getName() == "llvm.memmove.i64" || F->getName() == "memmove") { - // Note: this is a poor approximation, this says Dest = Src, instead of - // *Dest = *Src. - Constraints.push_back(Constraint(Constraint::Copy, - getNode(CS.getArgument(0)), - getNode(CS.getArgument(1)))); + + // *Dest = *Src, which requires an artificial graph node to represent the + // constraint. It is broken up into *Dest = temp, temp = *Src + unsigned FirstArg = getNode(CS.getArgument(0)); + unsigned SecondArg = getNode(CS.getArgument(1)); + unsigned TempArg = GraphNodes.size(); + GraphNodes.push_back(Node()); + Constraints.push_back(Constraint(Constraint::Store, + FirstArg, TempArg)); + Constraints.push_back(Constraint(Constraint::Load, + TempArg, SecondArg)); return true; } @@ -679,49 +713,99 @@ bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { +/// AnalyzeUsesOfFunction - Look at all of the users of the specified function. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. +bool Andersens::AnalyzeUsesOfFunction(Value *V) { + + if (!isa<PointerType>(V->getType())) return true; + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (dyn_cast<LoadInst>(*UI)) { + return false; + } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (V == SI->getOperand(1)) { + return false; + } else if (SI->getOperand(1)) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + if (AnalyzeUsesOfFunction(GEP)) return true; + } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) + if (CI->getOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) + if (II->getOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfFunction(CE)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else if (dyn_cast<FreeInst>(*UI)) { + return false; + } else { + return true; + } + return false; +} + /// CollectConstraints - This stage scans the program, adding a constraint to /// the Constraints list for each instruction in the program that induces a /// constraint, and setting up the initial points-to graph. /// void Andersens::CollectConstraints(Module &M) { // First, the universal set points to itself. - GraphNodes[UniversalSet].addPointerTo(&GraphNodes[UniversalSet]); - //Constraints.push_back(Constraint(Constraint::Load, &GraphNodes[UniversalSet], - // &GraphNodes[UniversalSet])); - Constraints.push_back(Constraint(Constraint::Store, &GraphNodes[UniversalSet], - &GraphNodes[UniversalSet])); + Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet, + UniversalSet)); + Constraints.push_back(Constraint(Constraint::Store, UniversalSet, + UniversalSet)); // Next, the null pointer points to the null object. - GraphNodes[NullPtr].addPointerTo(&GraphNodes[NullObject]); + Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject)); // Next, add any constraints on global variables and their initializers. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { // Associate the address of the global object as pointing to the memory for // the global: &G = <G memory> - Node *Object = getObject(I); + unsigned ObjectIndex = getObject(I); + Node *Object = &GraphNodes[ObjectIndex]; Object->setValue(I); - getNodeValue(*I)->addPointerTo(Object); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), + ObjectIndex)); if (I->hasInitializer()) { - AddGlobalInitializerConstraints(Object, I->getInitializer()); + AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); } else { // If it doesn't have an initializer (i.e. it's defined in another // translation unit), it points to the universal set. - Constraints.push_back(Constraint(Constraint::Copy, Object, - &GraphNodes[UniversalSet])); + Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex, + UniversalSet)); } } for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { // Make the function address point to the function object. - getNodeValue(*F)->addPointerTo(getObject(F)->setValue(F)); - + unsigned ObjectIndex = getObject(F); + GraphNodes[ObjectIndex].setValue(F); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*F), + ObjectIndex)); // Set up the return value node. if (isa<PointerType>(F->getFunctionType()->getReturnType())) - getReturnNode(F)->setValue(F); + GraphNodes[getReturnNode(F)].setValue(F); if (F->getFunctionType()->isVarArg()) - getVarargNode(F)->setValue(F); + GraphNodes[getVarargNode(F)].setValue(F); // Set up incoming argument nodes. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); @@ -729,7 +813,10 @@ void Andersens::CollectConstraints(Module &M) { if (isa<PointerType>(I->getType())) getNodeValue(*I); - if (!F->hasInternalLinkage()) + // At some point we should just add constraints for the escaping functions + // at solve time, but this slows down solving. For now, we simply mark + // address taken functions as escaping and treat them as external. + if (!F->hasInternalLinkage() || AnalyzeUsesOfFunction(F)) AddConstraintsForNonInternalLinkage(F); if (!F->isDeclaration()) { @@ -742,7 +829,7 @@ void Andersens::CollectConstraints(Module &M) { if (isa<PointerType>(F->getFunctionType()->getReturnType())) Constraints.push_back(Constraint(Constraint::Copy, getReturnNode(F), - &GraphNodes[UniversalSet])); + UniversalSet)); // Any pointers that are passed into the function have the universal set // stored into them. @@ -752,11 +839,11 @@ void Andersens::CollectConstraints(Module &M) { // Pointers passed into external functions could have anything stored // through them. Constraints.push_back(Constraint(Constraint::Store, getNode(I), - &GraphNodes[UniversalSet])); + UniversalSet)); // Memory objects passed into external function calls can have the // universal set point to them. Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], + UniversalSet, getNode(I))); } @@ -764,7 +851,7 @@ void Andersens::CollectConstraints(Module &M) { // into any pointers passed through the varargs section. if (F->getFunctionType()->isVarArg()) Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), - &GraphNodes[UniversalSet])); + UniversalSet)); } } NumConstraints += Constraints.size(); @@ -795,7 +882,10 @@ void Andersens::visitInstruction(Instruction &I) { } void Andersens::visitAllocationInst(AllocationInst &AI) { - getNodeValue(AI)->addPointerTo(getObject(&AI)->setValue(&AI)); + unsigned ObjectIndex = getObject(&AI); + GraphNodes[ObjectIndex].setValue(&AI); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), + ObjectIndex)); } void Andersens::visitReturnInst(ReturnInst &RI) { @@ -829,7 +919,7 @@ void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { void Andersens::visitPHINode(PHINode &PN) { if (isa<PointerType>(PN.getType())) { - Node *PNN = getNodeValue(PN); + unsigned PNN = getNodeValue(PN); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ... Constraints.push_back(Constraint(Constraint::Copy, PNN, @@ -848,7 +938,7 @@ void Andersens::visitCastInst(CastInst &CI) { // P1 = cast int --> <Copy/P1/Univ> #if 0 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), - &GraphNodes[UniversalSet])); + UniversalSet)); #else getNodeValue(CI); #endif @@ -857,7 +947,7 @@ void Andersens::visitCastInst(CastInst &CI) { // int = cast P1 --> <Copy/Univ/P1> #if 0 Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], + UniversalSet, getNode(CI.getOperand(0)))); #else getNode(CI.getOperand(0)); @@ -867,7 +957,7 @@ void Andersens::visitCastInst(CastInst &CI) { void Andersens::visitSelectInst(SelectInst &SI) { if (isa<PointerType>(SI.getType())) { - Node *SIN = getNodeValue(SI); + unsigned SIN = getNodeValue(SI); // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3> Constraints.push_back(Constraint(Constraint::Copy, SIN, getNode(SI.getOperand(1)))); @@ -886,48 +976,72 @@ void Andersens::visitVAArg(VAArgInst &I) { /// the function pointer has been casted. If this is the case, do something /// reasonable. void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { - // If this is a call to an external function, handle it directly to get some - // taste of context sensitivity. - if (F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) + Value *CallValue = CS.getCalledValue(); + bool IsDeref = F == NULL; + + // If this is a call to an external function, try to handle it directly to get + // some taste of context sensitivity. + if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) return; if (isa<PointerType>(CS.getType())) { - Node *CSN = getNode(CS.getInstruction()); - if (isa<PointerType>(F->getFunctionType()->getReturnType())) { - Constraints.push_back(Constraint(Constraint::Copy, CSN, - getReturnNode(F))); + unsigned CSN = getNode(CS.getInstruction()); + if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) { + if (IsDeref) + Constraints.push_back(Constraint(Constraint::Load, CSN, + getNode(CallValue), CallReturnPos)); + else + Constraints.push_back(Constraint(Constraint::Copy, CSN, + getNode(CallValue) + CallReturnPos)); } else { // If the function returns a non-pointer value, handle this just like we // treat a nonpointer cast to pointer. Constraints.push_back(Constraint(Constraint::Copy, CSN, - &GraphNodes[UniversalSet])); + UniversalSet)); } - } else if (isa<PointerType>(F->getFunctionType()->getReturnType())) { + } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) { Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], - getReturnNode(F))); + UniversalSet, + getNode(CallValue) + CallReturnPos)); } - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); - for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) - if (isa<PointerType>(AI->getType())) { + if (F) { + // Direct Call + Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) + if (isa<PointerType>(AI->getType())) { + if (isa<PointerType>((*ArgI)->getType())) { + // Copy the actual argument into the formal argument. + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + getNode(*ArgI))); + } else { + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + UniversalSet)); + } + } else if (isa<PointerType>((*ArgI)->getType())) { + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(*ArgI))); + } + } else { + //Indirect Call + unsigned ArgPos = CallFirstArgPos; + for (; ArgI != ArgE; ++ArgI) { |