aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/JSBackend/AllocaManager.cpp
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2014-03-05 13:16:08 -0800
committerAlon Zakai <alonzakai@gmail.com>2014-03-05 13:16:08 -0800
commit450872142f746f2047816534c345981be2e7bdf1 (patch)
tree1b6cdcfac88f4400566b5071b00504843e7c27a4 /lib/Target/JSBackend/AllocaManager.cpp
parentdc1378e24ff461546f531ec3a6138046b8e9c9ef (diff)
parentc6f3be7a4da7ba2f94dcbd3b442147807b558b99 (diff)
Merge pull request #28 from sunfishcode/incoming
The alloca coloring patch
Diffstat (limited to 'lib/Target/JSBackend/AllocaManager.cpp')
-rw-r--r--lib/Target/JSBackend/AllocaManager.cpp520
1 files changed, 520 insertions, 0 deletions
diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp
new file mode 100644
index 0000000000..4673a34644
--- /dev/null
+++ b/lib/Target/JSBackend/AllocaManager.cpp
@@ -0,0 +1,520 @@
+//===-- AllocaManager.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AllocaManager class.
+//
+// The AllocaManager computes a frame layout, assigning every static alloca an
+// offset. It does alloca liveness analysis in order to reuse stack memory,
+// using lifetime intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "allocamanager"
+#include "AllocaManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAllocas, "Number of allocas eliminated");
+
+// Return the size of the given alloca.
+uint64_t AllocaManager::getSize(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return DL->getTypeAllocSize(AI->getAllocatedType()) *
+ cast<ConstantInt>(AI->getArraySize())->getValue().getZExtValue();
+}
+
+// Return the alignment of the given alloca.
+unsigned AllocaManager::getAlignment(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return std::max(AI->getAlignment(),
+ DL->getABITypeAlignment(AI->getAllocatedType()));
+}
+
+AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return AllocaInfo(AI, getSize(AI), getAlignment(AI));
+}
+
+// Given a lifetime_start or lifetime_end intrinsic, determine if it's
+// describing a static alloc memory region suitable for our analysis. If so,
+// return the alloca, otherwise return NULL.
+const AllocaInst *
+AllocaManager::getAllocaFromIntrinsic(const CallInst *CI) {
+ const IntrinsicInst *II = cast<IntrinsicInst>(CI);
+ assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end);
+
+ // Lifetime intrinsics have a size as their first argument and a pointer as
+ // their second argument.
+ const Value *Size = II->getArgOperand(0);
+ const Value *Ptr = II->getArgOperand(1);
+
+ // Check to see if we can convert the size to a host integer. If we can't,
+ // it's probably not worth worrying about.
+ const ConstantInt *SizeCon = dyn_cast<ConstantInt>(Size);
+ if (!SizeCon) return NULL;
+ const APInt &SizeAP = SizeCon->getValue();
+ if (SizeAP.getActiveBits() > 64) return NULL;
+ uint64_t MarkedSize = SizeAP.getZExtValue();
+
+ // We're only interested if the pointer is a static alloca.
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr->stripPointerCasts());
+ if (!AI || !AI->isStaticAlloca()) return NULL;
+
+ // Make sure the size covers the alloca.
+ if (MarkedSize < getSize(AI)) return NULL;
+
+ return AI;
+}
+
+int AllocaManager::AllocaSort(const void *l, const void *r) {
+ const AllocaInfo *li = static_cast<const AllocaInfo *>(l);
+ const AllocaInfo *ri = static_cast<const AllocaInfo *>(r);
+
+ // Sort by alignment to minimize padding.
+ if (li->getAlignment() > ri->getAlignment()) return -1;
+ if (li->getAlignment() < ri->getAlignment()) return 1;
+
+ // Ensure a stable sort. We can do this because the pointers are
+ // pointing into the same array.
+ if (li > ri) return -1;
+ if (li < ri) return 1;
+
+ return 0;
+}
+
+// Collect allocas
+void AllocaManager::collectMarkedAllocas() {
+ NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager",
+ TimePassesIsEnabled);
+
+ // Weird semantics: If an alloca *ever* appears in a lifetime start or end
+ // within the same function, its lifetime begins only at the explicit lifetime
+ // starts and ends only at the explicit lifetime ends and function exit
+ // points. Otherwise, its lifetime begins in the entry block and it is live
+ // everywhere.
+ //
+ // And so, instead of just walking the entry block to find all the static
+ // allocas, we walk the whole body to find the intrinsics so we can find the
+ // set of static allocas referenced in the intrinsics.
+ for (Function::const_iterator FI = F->begin(), FE = F->end();
+ FI != FE; ++FI) {
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart || Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ Allocas.insert(std::make_pair(AI, 0));
+ }
+ }
+ }
+ }
+
+ // All that said, we still want the intrinsics in the order they appear in the
+ // block, so that we can represent later ones with earlier ones and skip
+ // worrying about dominance, so run through the entry block and index those
+ // allocas which we identified above.
+ AllocasByIndex.reserve(Allocas.size());
+ const BasicBlock *EntryBB = &F->getEntryBlock();
+ for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+ BI != BE; ++BI) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+ if (!AI || !AI->isStaticAlloca()) continue;
+
+ AllocaMap::iterator I = Allocas.find(AI);
+ if (I != Allocas.end()) {
+ I->second = AllocasByIndex.size();
+ AllocasByIndex.push_back(getInfo(AI));
+ }
+ }
+ assert(AllocasByIndex.size() == Allocas.size());
+}
+
+// Calculate the starting point from which inter-block liveness will be
+// computed.
+void AllocaManager::collectBlocks() {
+ NamedRegionTimer Timer("Collect Blocks", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Seen(AllocaCount);
+
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ const BasicBlock *BB = I;
+
+ BlockLifetimeInfo &BLI = BlockLiveness[BB];
+ BLI.Start.resize(AllocaCount);
+ BLI.End.resize(AllocaCount);
+
+ // Track which allocas we've seen. This is used because if a lifetime start
+ // is the first lifetime marker for an alloca in a block, the alloca is
+ // live-in.
+ Seen.reset();
+
+ // Walk the instructions and compute the Start and End sets.
+ for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ AllocaMap::const_iterator MI = Allocas.find(AI);
+ if (MI != Allocas.end()) {
+ size_t AllocaIndex = MI->second;
+ if (!Seen.test(AllocaIndex)) {
+ BLI.Start.set(AllocaIndex);
+ }
+ BLI.End.reset(AllocaIndex);
+ Seen.set(AllocaIndex);
+ }
+ }
+ } else if (Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ AllocaMap::const_iterator MI = Allocas.find(AI);
+ if (MI != Allocas.end()) {
+ size_t AllocaIndex = MI->second;
+ BLI.End.set(AllocaIndex);
+ Seen.set(AllocaIndex);
+ }
+ }
+ }
+ }
+
+ // Lifetimes that start in this block and do not end here are live-out.
+ BLI.LiveOut = BLI.Start;
+ BLI.LiveOut.reset(BLI.End);
+ if (BLI.LiveOut.any()) {
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ InterBlockWorklist.insert(*SI);
+ }
+ }
+
+ // Lifetimes that end in this block and do not start here are live-in.
+ // TODO: Is this actually true? What are the semantics of a standalone
+ // lifetime end? See also the code in computeInterBlockLiveness.
+ BLI.LiveIn = BLI.End;
+ BLI.LiveIn.reset(BLI.Start);
+ if (BLI.LiveIn.any()) {
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ InterBlockWorklist.insert(*PI);
+ }
+ }
+ }
+}
+
+// Compute the LiveIn and LiveOut sets for each block in F.
+void AllocaManager::computeInterBlockLiveness() {
+ NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Temp(AllocaCount);
+
+ // This is currently using a very simple-minded bi-directional liveness
+ // propagation algorithm. Numerous opportunities for compile time
+ // speedups here.
+ while (!InterBlockWorklist.empty()) {
+ const BasicBlock *BB = InterBlockWorklist.pop_back_val();
+ BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+ // Compute the new live-in set.
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ Temp |= BlockLiveness[*PI].LiveOut;
+ }
+
+ // If it contains new live blocks, prepare to propagate them.
+ if (Temp.test(BLI.LiveIn)) {
+ BLI.LiveIn |= Temp;
+ BLI.LiveOut |= Temp;
+ BLI.LiveOut.reset(BLI.End);
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ InterBlockWorklist.insert(*SI);
+ }
+ }
+ Temp.reset();
+
+ // Compute the new live-out set.
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ Temp |= BlockLiveness[*SI].LiveIn;
+ }
+
+ // If it contains new live blocks, prepare to propagate them.
+ if (Temp.test(BLI.LiveOut)) {
+ // TODO: As above, what are the semantics of a standalone lifetime end?
+ BLI.LiveOut |= Temp;
+ BLI.LiveIn |= Temp;
+ BLI.LiveIn.reset(BLI.Start);
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ InterBlockWorklist.insert(*PI);
+ }
+ }
+ Temp.reset();
+ }
+}
+
+// Determine overlapping liveranges within blocks.
+void AllocaManager::computeIntraBlockLiveness() {
+ NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Current(AllocaCount);
+
+ AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true));
+
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ const BasicBlock *BB = I;
+ const BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+ Current = BLI.LiveIn;
+
+ for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+ AllocaCompatibility[i].reset(Current);
+ }
+
+ for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ size_t AIndex = Allocas[AI];
+ // We conflict with everything else that's currently live.
+ AllocaCompatibility[AIndex].reset(Current);
+ // Everything else that's currently live conflicts with us.
+ for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+ AllocaCompatibility[i].reset(AIndex);
+ }
+ // We're now live.
+ Current.set(AIndex);
+ }
+ } else if (Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ size_t AIndex = Allocas[AI];
+ // We're no longer live.
+ Current.reset(AIndex);
+ }
+ }
+ }
+ }
+}
+
+// Decide which allocas will represent which other allocas, and if so what their
+// size and alignment will need to be.
+void AllocaManager::computeRepresentatives() {
+ NamedRegionTimer Timer("Compute Representatives", "AllocaManager",
+ TimePassesIsEnabled);
+
+ for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+ // If we've already represented this alloca with another, don't visit it.
+ if (AllocasByIndex[i].isForwarded()) continue;
+ if (i > size_t(INT_MAX)) continue;
+
+ // Find compatible allocas. This is a simple greedy algorithm.
+ for (int j = int(i); ; ) {
+ assert(j >= int(i));
+ j = AllocaCompatibility[i].find_next(j);
+ assert(j != int(i));
+ if (j < 0) break;
+ if (!AllocaCompatibility[j][i]) continue;
+
+ DEBUG(dbgs() << "Allocas: "
+ "Representing "
+ << AllocasByIndex[j].getInst()->getName() << " "
+ "with "
+ << AllocasByIndex[i].getInst()->getName() << "\n");
+ ++NumAllocas;
+
+ assert(!AllocasByIndex[j].isForwarded());
+
+ AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize());
+ AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment());
+ AllocasByIndex[j].forward(i);
+
+ AllocaCompatibility[i] &= AllocaCompatibility[j];
+ AllocaCompatibility[j].reset();
+ }
+ }
+}
+
+void AllocaManager::computeFrameOffsets() {
+ NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager",
+ TimePassesIsEnabled);
+
+ // Walk through the entry block and collect all the allocas, including the
+ // ones with no lifetime markers that we haven't looked at yet. We walk in
+ // reverse order so that we can set the representative allocas as those that
+ // dominate the others as we go.
+ const BasicBlock *EntryBB = &F->getEntryBlock();
+ for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+ BI != BE; ++BI) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+ if (!AI || !AI->isStaticAlloca()) continue;
+
+ AllocaMap::const_iterator I = Allocas.find(AI);
+ if (I != Allocas.end()) {
+ // An alloca with lifetime markers. Emit the record we've crafted for it,
+ // if we've chosen to keep it as a representative.
+ const AllocaInfo &Info = AllocasByIndex[I->second];
+ if (!Info.isForwarded()) {
+ SortedAllocas.push_back(Info);
+ }
+ } else {
+ // An alloca with no lifetime markers.
+ SortedAllocas.push_back(getInfo(AI));
+ }
+ }
+
+ // Sort the allocas to hopefully reduce padding.
+ array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort);
+
+ // Assign stack offsets.
+ uint64_t CurrentOffset = 0;
+ for (SmallVectorImpl<AllocaInfo>::const_iterator I = SortedAllocas.begin(),
+ E = SortedAllocas.end(); I != E; ++I) {
+ const AllocaInfo &Info = *I;
+ uint64_t NewOffset = RoundUpToAlignment(CurrentOffset, Info.getAlignment());
+
+ // For backwards compatibility, align every power-of-two multiple alloca to
+ // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash
+ // is known to depend on this.
+ // TODO: Consider disabling this and making people fix their code.
+ if (uint64_t Size = Info.getSize()) {
+ uint64_t P2 = uint64_t(1) << CountTrailingZeros_64(Size);
+ unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8)));
+ NewOffset = RoundUpToAlignment(NewOffset, CompatAlign);
+ }
+
+ const AllocaInst *AI = Info.getInst();
+ StaticAllocas[AI] = StaticAllocation(AI, NewOffset);
+
+ CurrentOffset = NewOffset + Info.getSize();
+ }
+
+ // Add allocas that were represented by other allocas to the StaticAllocas map
+ // so that our clients can look them up.
+ for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+ const AllocaInfo &Info = AllocasByIndex[i];
+ if (!Info.isForwarded()) continue;
+ size_t j = Info.getForwardedID();
+ assert(!AllocasByIndex[j].isForwarded());
+
+ StaticAllocaMap::const_iterator I =
+ StaticAllocas.find(AllocasByIndex[j].getInst());
+ assert(I != StaticAllocas.end());
+
+ std::pair<StaticAllocaMap::const_iterator, bool> Pair =
+ StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(),
+ I->second));
+ assert(Pair.second); (void)Pair;
+ }
+
+ // Record the final frame size. Keep the stack pointer 16-byte aligned.
+ FrameSize = CurrentOffset;
+ FrameSize = RoundUpToAlignment(FrameSize, 16);
+
+ DEBUG(dbgs() << "Allocas: "
+ "Statically allocated frame size is " << FrameSize << "\n");
+}
+
+AllocaManager::AllocaManager() {
+}
+
+void AllocaManager::analyze(const Function &Func, const DataLayout &Layout,
+ bool PerformColoring) {
+ NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled);
+
+ assert(Allocas.empty());
+ assert(AllocasByIndex.empty());
+ assert(AllocaCompatibility.empty());
+ assert(BlockLiveness.empty());
+ assert(StaticAllocas.empty());
+ assert(SortedAllocas.empty());
+
+ DL = &Layout;
+ F = &Func;
+
+ // Get the declarations for the lifetime intrinsics so we can quickly test to
+ // see if they are used at all, and for use later if they are.
+ const Module *M = F->getParent();
+ LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start));
+ LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end));
+
+ // If we are optimizing and the module contains any lifetime intrinsics, run
+ // the alloca coloring algorithm.
+ if (PerformColoring &&
+ ((LifetimeStart && !LifetimeStart->use_empty()) ||
+ (LifetimeEnd && !LifetimeEnd->use_empty()))) {
+
+ collectMarkedAllocas();
+
+ if (!AllocasByIndex.empty()) {
+ DEBUG(dbgs() << "Allocas: "
+ << AllocasByIndex.size() << " marked allocas found\n");
+
+ collectBlocks();
+ computeInterBlockLiveness();
+ computeIntraBlockLiveness();
+ BlockLiveness.clear();
+
+ computeRepresentatives();
+ AllocaCompatibility.clear();
+ }
+ }
+
+ computeFrameOffsets();
+ SortedAllocas.clear();
+ Allocas.clear();
+ AllocasByIndex.clear();
+}
+
+void AllocaManager::clear() {
+ StaticAllocas.clear();
+}
+
+bool
+AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const {
+ assert(AI->isStaticAlloca());
+ StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+ assert(I != StaticAllocas.end());
+ *Offset = I->second.Offset;
+ return AI == I->second.Representative;
+}
+
+const AllocaInst *
+AllocaManager::getRepresentative(const AllocaInst *AI) const {
+ assert(AI->isStaticAlloca());
+ StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+ assert(I != StaticAllocas.end());
+ return I->second.Representative;
+}