//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a very simple implementation of Andersen's interprocedural
// alias analysis. This implementation does not include any of the fancy
// features that make Andersen's reasonably efficient (like cycle elimination or
// variable substitution), but it should be useful for getting precision
// numbers and can be extended in the future.
//
// In pointer analysis terms, this is a subset-based, flow-insensitive,
// field-insensitive, and context-insensitive algorithm pointer algorithm.
//
// This algorithm is implemented as three stages:
// 1. Object identification.
// 2. Inclusion constraint identification.
// 3. Inclusion constraint solving.
//
// The object identification stage identifies all of the memory objects in the
// program, which includes globals, heap allocated objects, and stack allocated
// objects.
//
// The inclusion constraint identification stage finds all inclusion constraints
// in the program by scanning the program, looking for pointer assignments and
// other statements that effect the points-to graph. For a statement like "A =
// B", this statement is processed to indicate that A can point to anything that
// B can point to. Constraints can handle copies, loads, and stores.
//
// The inclusion constraint solving phase iteratively propagates the inclusion
// constraints until a fixed point is reached. This is an O(N^3) algorithm.
//
// In the initial pass, all indirect function calls are completely ignored. As
// the analysis discovers new targets of function pointers, it iteratively
// resolves a precise (and conservative) call graph. Also related, this
// analysis initially assumes that all internal functions have known incoming
// pointers. If we find that an internal function's address escapes outside of
// the program, we update this assumption.
//
// Future Improvements:
// This implementation of Andersen's algorithm is extremely slow. To make it
// scale reasonably well, the inclusion constraints could be sorted (easy),
// offline variable substitution would be a huge win (straight-forward), and
// online cycle elimination (trickier) might help as well.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "anders-aa"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
#include <set>
#include <iostream>
using namespace llvm;
namespace {
Statistic<>
NumIters("anders-aa", "Number of iterations to reach convergence");
Statistic<>
NumConstraints("anders-aa", "Number of constraints");
Statistic<>
NumNodes("anders-aa", "Number of nodes");
Statistic<>
NumEscapingFunctions("anders-aa", "Number of internal functions that escape");
Statistic<>
NumIndirectCallees("anders-aa", "Number of indirect callees found");
class Andersens : public ModulePass, public AliasAnalysis,
private InstVisitor<Andersens> {
/// Node class - This class is used to represent a memory object in the
/// program, and is the primitive used to build the points-to graph.
class Node {
std::vector<Node*> Pointees;
Value *Val;
public:
Node() : Val(0) {}
Node *setValue(Value *V) {
assert(Val == 0 && "Value already set for this node!");
Val = V;
return this;
}
/// getValue - Return the LLVM value corresponding to this node.
///
Value *getValue() const { return Val; }
typedef std::vector<Node*>::const_iterator iterator;
iterator begin() const { return Pointees.begin(); }
iterator end() const { return Pointees.end(); }
/// addPointerTo - Add a pointer to the list of pointees of this node,
/// returning true if this caused a new pointer to be added, or false if
/// we already knew about the points-to relation.
bool addPointerTo(Node *N) {
std::vector<Node*>::iterator I = std::lower_bound(Pointees.begin(),
Pointees.end(),
N);
if (I != Pointees.end() && *I == N)
return false;
Pointees.insert(I, N);
return true;
}
/// intersects - Return true if the points-to set of this node intersects
/// with the points-to set of the specif