diff options
-rw-r--r-- | lib/Target/JSBackend/AllocaManager.cpp | 520 | ||||
-rw-r--r-- | lib/Target/JSBackend/AllocaManager.h | 172 | ||||
-rw-r--r-- | lib/Target/JSBackend/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/JSBackend/JSBackend.cpp | 143 | ||||
-rw-r--r-- | test/CodeGen/JS/allocamanager.ll | 166 |
5 files changed, 933 insertions, 69 deletions
diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp new file mode 100644 index 0000000000..4673a34644 --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.cpp @@ -0,0 +1,520 @@ +//===-- AllocaManager.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AllocaManager class. +// +// The AllocaManager computes a frame layout, assigning every static alloca an +// offset. It does alloca liveness analysis in order to reuse stack memory, +// using lifetime intrinsics. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "allocamanager" +#include "AllocaManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumAllocas, "Number of allocas eliminated"); + +// Return the size of the given alloca. +uint64_t AllocaManager::getSize(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return DL->getTypeAllocSize(AI->getAllocatedType()) * + cast<ConstantInt>(AI->getArraySize())->getValue().getZExtValue(); +} + +// Return the alignment of the given alloca. +unsigned AllocaManager::getAlignment(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return std::max(AI->getAlignment(), + DL->getABITypeAlignment(AI->getAllocatedType())); +} + +AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return AllocaInfo(AI, getSize(AI), getAlignment(AI)); +} + +// Given a lifetime_start or lifetime_end intrinsic, determine if it's +// describing a static alloc memory region suitable for our analysis. If so, +// return the alloca, otherwise return NULL. +const AllocaInst * +AllocaManager::getAllocaFromIntrinsic(const CallInst *CI) { + const IntrinsicInst *II = cast<IntrinsicInst>(CI); + assert(II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end); + + // Lifetime intrinsics have a size as their first argument and a pointer as + // their second argument. + const Value *Size = II->getArgOperand(0); + const Value *Ptr = II->getArgOperand(1); + + // Check to see if we can convert the size to a host integer. If we can't, + // it's probably not worth worrying about. + const ConstantInt *SizeCon = dyn_cast<ConstantInt>(Size); + if (!SizeCon) return NULL; + const APInt &SizeAP = SizeCon->getValue(); + if (SizeAP.getActiveBits() > 64) return NULL; + uint64_t MarkedSize = SizeAP.getZExtValue(); + + // We're only interested if the pointer is a static alloca. + const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr->stripPointerCasts()); + if (!AI || !AI->isStaticAlloca()) return NULL; + + // Make sure the size covers the alloca. + if (MarkedSize < getSize(AI)) return NULL; + + return AI; +} + +int AllocaManager::AllocaSort(const void *l, const void *r) { + const AllocaInfo *li = static_cast<const AllocaInfo *>(l); + const AllocaInfo *ri = static_cast<const AllocaInfo *>(r); + + // Sort by alignment to minimize padding. + if (li->getAlignment() > ri->getAlignment()) return -1; + if (li->getAlignment() < ri->getAlignment()) return 1; + + // Ensure a stable sort. We can do this because the pointers are + // pointing into the same array. + if (li > ri) return -1; + if (li < ri) return 1; + + return 0; +} + +// Collect allocas +void AllocaManager::collectMarkedAllocas() { + NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager", + TimePassesIsEnabled); + + // Weird semantics: If an alloca *ever* appears in a lifetime start or end + // within the same function, its lifetime begins only at the explicit lifetime + // starts and ends only at the explicit lifetime ends and function exit + // points. Otherwise, its lifetime begins in the entry block and it is live + // everywhere. + // + // And so, instead of just walking the entry block to find all the static + // allocas, we walk the whole body to find the intrinsics so we can find the + // set of static allocas referenced in the intrinsics. + for (Function::const_iterator FI = F->begin(), FE = F->end(); + FI != FE; ++FI) { + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast<CallInst>(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart || Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + Allocas.insert(std::make_pair(AI, 0)); + } + } + } + } + + // All that said, we still want the intrinsics in the order they appear in the + // block, so that we can represent later ones with earlier ones and skip + // worrying about dominance, so run through the entry block and index those + // allocas which we identified above. + AllocasByIndex.reserve(Allocas.size()); + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast<AllocaInst>(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + I->second = AllocasByIndex.size(); + AllocasByIndex.push_back(getInfo(AI)); + } + } + assert(AllocasByIndex.size() == Allocas.size()); +} + +// Calculate the starting point from which inter-block liveness will be +// computed. +void AllocaManager::collectBlocks() { + NamedRegionTimer Timer("Collect Blocks", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Seen(AllocaCount); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = I; + + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + BLI.Start.resize(AllocaCount); + BLI.End.resize(AllocaCount); + + // Track which allocas we've seen. This is used because if a lifetime start + // is the first lifetime marker for an alloca in a block, the alloca is + // live-in. + Seen.reset(); + + // Walk the instructions and compute the Start and End sets. + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast<CallInst>(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + if (!Seen.test(AllocaIndex)) { + BLI.Start.set(AllocaIndex); + } + BLI.End.reset(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } else if (Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + BLI.End.set(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } + } + + // Lifetimes that start in this block and do not end here are live-out. + BLI.LiveOut = BLI.Start; + BLI.LiveOut.reset(BLI.End); + if (BLI.LiveOut.any()) { + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockWorklist.insert(*SI); + } + } + + // Lifetimes that end in this block and do not start here are live-in. + // TODO: Is this actually true? What are the semantics of a standalone + // lifetime end? See also the code in computeInterBlockLiveness. + BLI.LiveIn = BLI.End; + BLI.LiveIn.reset(BLI.Start); + if (BLI.LiveIn.any()) { + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockWorklist.insert(*PI); + } + } + } +} + +// Compute the LiveIn and LiveOut sets for each block in F. +void AllocaManager::computeInterBlockLiveness() { + NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Temp(AllocaCount); + + // This is currently using a very simple-minded bi-directional liveness + // propagation algorithm. Numerous opportunities for compile time + // speedups here. + while (!InterBlockWorklist.empty()) { + const BasicBlock *BB = InterBlockWorklist.pop_back_val(); + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + // Compute the new live-in set. + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + Temp |= BlockLiveness[*PI].LiveOut; + } + + // If it contains new live blocks, prepare to propagate them. + if (Temp.test(BLI.LiveIn)) { + BLI.LiveIn |= Temp; + BLI.LiveOut |= Temp; + BLI.LiveOut.reset(BLI.End); + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockWorklist.insert(*SI); + } + } + Temp.reset(); + + // Compute the new live-out set. + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + Temp |= BlockLiveness[*SI].LiveIn; + } + + // If it contains new live blocks, prepare to propagate them. + if (Temp.test(BLI.LiveOut)) { + // TODO: As above, what are the semantics of a standalone lifetime end? + BLI.LiveOut |= Temp; + BLI.LiveIn |= Temp; + BLI.LiveIn.reset(BLI.Start); + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockWorklist.insert(*PI); + } + } + Temp.reset(); + } +} + +// Determine overlapping liveranges within blocks. +void AllocaManager::computeIntraBlockLiveness() { + NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Current(AllocaCount); + + AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true)); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = I; + const BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + Current = BLI.LiveIn; + + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(Current); + } + + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast<CallInst>(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + size_t AIndex = Allocas[AI]; + // We conflict with everything else that's currently live. + AllocaCompatibility[AIndex].reset(Current); + // Everything else that's currently live conflicts with us. + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(AIndex); + } + // We're now live. + Current.set(AIndex); + } + } else if (Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + size_t AIndex = Allocas[AI]; + // We're no longer live. + Current.reset(AIndex); + } + } + } + } +} + +// Decide which allocas will represent which other allocas, and if so what their +// size and alignment will need to be. +void AllocaManager::computeRepresentatives() { + NamedRegionTimer Timer("Compute Representatives", "AllocaManager", + TimePassesIsEnabled); + + for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) { + // If we've already represented this alloca with another, don't visit it. + if (AllocasByIndex[i].isForwarded()) continue; + if (i > size_t(INT_MAX)) continue; + + // Find compatible allocas. This is a simple greedy algorithm. + for (int j = int(i); ; ) { + assert(j >= int(i)); + j = AllocaCompatibility[i].find_next(j); + assert(j != int(i)); + if (j < 0) break; + if (!AllocaCompatibility[j][i]) continue; + + DEBUG(dbgs() << "Allocas: " + "Representing " + << AllocasByIndex[j].getInst()->getName() << " " + "with " + << AllocasByIndex[i].getInst()->getName() << "\n"); + ++NumAllocas; + + assert(!AllocasByIndex[j].isForwarded()); + + AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize()); + AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment()); + AllocasByIndex[j].forward(i); + + AllocaCompatibility[i] &= AllocaCompatibility[j]; + AllocaCompatibility[j].reset(); + } + } +} + +void AllocaManager::computeFrameOffsets() { + NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager", + TimePassesIsEnabled); + + // Walk through the entry block and collect all the allocas, including the + // ones with no lifetime markers that we haven't looked at yet. We walk in + // reverse order so that we can set the representative allocas as those that + // dominate the others as we go. + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast<AllocaInst>(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::const_iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + // An alloca with lifetime markers. Emit the record we've crafted for it, + // if we've chosen to keep it as a representative. + const AllocaInfo &Info = AllocasByIndex[I->second]; + if (!Info.isForwarded()) { + SortedAllocas.push_back(Info); + } + } else { + // An alloca with no lifetime markers. + SortedAllocas.push_back(getInfo(AI)); + } + } + + // Sort the allocas to hopefully reduce padding. + array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort); + + // Assign stack offsets. + uint64_t CurrentOffset = 0; + for (SmallVectorImpl<AllocaInfo>::const_iterator I = SortedAllocas.begin(), + E = SortedAllocas.end(); I != E; ++I) { + const AllocaInfo &Info = *I; + uint64_t NewOffset = RoundUpToAlignment(CurrentOffset, Info.getAlignment()); + + // For backwards compatibility, align every power-of-two multiple alloca to + // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash + // is known to depend on this. + // TODO: Consider disabling this and making people fix their code. + if (uint64_t Size = Info.getSize()) { + uint64_t P2 = uint64_t(1) << CountTrailingZeros_64(Size); + unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8))); + NewOffset = RoundUpToAlignment(NewOffset, CompatAlign); + } + + const AllocaInst *AI = Info.getInst(); + StaticAllocas[AI] = StaticAllocation(AI, NewOffset); + + CurrentOffset = NewOffset + Info.getSize(); + } + + // Add allocas that were represented by other allocas to the StaticAllocas map + // so that our clients can look them up. + for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) { + const AllocaInfo &Info = AllocasByIndex[i]; + if (!Info.isForwarded()) continue; + size_t j = Info.getForwardedID(); + assert(!AllocasByIndex[j].isForwarded()); + + StaticAllocaMap::const_iterator I = + StaticAllocas.find(AllocasByIndex[j].getInst()); + assert(I != StaticAllocas.end()); + + std::pair<StaticAllocaMap::const_iterator, bool> Pair = + StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(), + I->second)); + assert(Pair.second); (void)Pair; + } + + // Record the final frame size. Keep the stack pointer 16-byte aligned. + FrameSize = CurrentOffset; + FrameSize = RoundUpToAlignment(FrameSize, 16); + + DEBUG(dbgs() << "Allocas: " + "Statically allocated frame size is " << FrameSize << "\n"); +} + +AllocaManager::AllocaManager() { +} + +void AllocaManager::analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring) { + NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled); + + assert(Allocas.empty()); + assert(AllocasByIndex.empty()); + assert(AllocaCompatibility.empty()); + assert(BlockLiveness.empty()); + assert(StaticAllocas.empty()); + assert(SortedAllocas.empty()); + + DL = &Layout; + F = &Func; + + // Get the declarations for the lifetime intrinsics so we can quickly test to + // see if they are used at all, and for use later if they are. + const Module *M = F->getParent(); + LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start)); + LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end)); + + // If we are optimizing and the module contains any lifetime intrinsics, run + // the alloca coloring algorithm. + if (PerformColoring && + ((LifetimeStart && !LifetimeStart->use_empty()) || + (LifetimeEnd && !LifetimeEnd->use_empty()))) { + + collectMarkedAllocas(); + + if (!AllocasByIndex.empty()) { + DEBUG(dbgs() << "Allocas: " + << AllocasByIndex.size() << " marked allocas found\n"); + + collectBlocks(); + computeInterBlockLiveness(); + computeIntraBlockLiveness(); + BlockLiveness.clear(); + + computeRepresentatives(); + AllocaCompatibility.clear(); + } + } + + computeFrameOffsets(); + SortedAllocas.clear(); + Allocas.clear(); + AllocasByIndex.clear(); +} + +void AllocaManager::clear() { + StaticAllocas.clear(); +} + +bool +AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + *Offset = I->second.Offset; + return AI == I->second.Representative; +} + +const AllocaInst * +AllocaManager::getRepresentative(const AllocaInst *AI) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + return I->second.Representative; +} diff --git a/lib/Target/JSBackend/AllocaManager.h b/lib/Target/JSBackend/AllocaManager.h new file mode 100644 index 0000000000..44b07981bc --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.h @@ -0,0 +1,172 @@ +//===-- AllocaManager.h ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass declares the AllocaManager class. +// +//===----------------------------------------------------------------------===// + +#ifndef JSBACKEND_ALLOCAMANAGER_H +#define JSBACKEND_ALLOCAMANAGER_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" + +namespace llvm { + +class AllocaInst; +class BasicBlock; +class CallInst; +class DataLayout; +class Function; + +/// Compute frame layout for allocas. +class AllocaManager { + const DataLayout *DL; + const Function *LifetimeStart; + const Function *LifetimeEnd; + const Function *F; + + // Per-block lifetime information. + struct BlockLifetimeInfo { + BitVector Start; + BitVector End; + BitVector LiveIn; + BitVector LiveOut; + }; + typedef DenseMap<const BasicBlock *, BlockLifetimeInfo> LivenessMap; + LivenessMap BlockLiveness; + + // Worklist for inter-block liveness analysis. + typedef SmallSetVector<const BasicBlock *, 8> InterBlockWorklistVec; + InterBlockWorklistVec InterBlockWorklist; + + // Map allocas to their index in AllocasByIndex. + typedef DenseMap<const AllocaInst *, size_t> AllocaMap; + AllocaMap Allocas; + + // Information about an alloca. Note that the size and alignment may vary + // from what's in the actual AllocaInst when an alloca is also representing + // another with perhaps greater size and/or alignment needs. + // + // When an alloca is represented by another, its AllocaInfo is marked as + // "forwarded", at which point it no longer holds a size and alignment, but + // the index of the representative AllocaInfo. + class AllocaInfo { + const AllocaInst *Inst; + uint64_t Size; + unsigned Alignment; + + public: + AllocaInfo(const AllocaInst *I, uint64_t S, unsigned A) + : Inst(I), Size(S), Alignment(A) { + assert(I != NULL); + assert(A != 0); + assert(!isForwarded()); + } + + bool isForwarded() const { return Alignment == 0; } + + size_t getForwardedID() const { + assert(isForwarded()); + return static_cast<size_t>(Size); + } + + void forward(size_t i) { + assert(!isForwarded()); + Alignment = 0; + Size = i; + assert(isForwarded()); + assert(getForwardedID() == i); + } + + const AllocaInst *getInst() const { return Inst; } + + uint64_t getSize() const { assert(!isForwarded()); return Size; } + unsigned getAlignment() const { assert(!isForwarded()); return Alignment; } + + void mergeSize(uint64_t S) { + assert(!isForwarded()); + Size = std::max(Size, S); + assert(!isForwarded()); + } + void mergeAlignment(unsigned A) { + assert(A != 0); + assert(!isForwarded()); + Alignment = std::max(Alignment, A); + assert(!isForwarded()); + } + }; + typedef SmallVector<AllocaInfo, 32> AllocaVec; + AllocaVec AllocasByIndex; + + // For each alloca, which allocas can it safely represent? Allocas are + // identified by AllocasByIndex index. + // TODO: Vector-of-vectors isn't the fastest data structure possible here. + typedef SmallVector<BitVector, 32> AllocaCompatibilityVec; + AllocaCompatibilityVec AllocaCompatibility; + + // This is for allocas that will eventually be sorted. + SmallVector<AllocaInfo, 32> SortedAllocas; + + // Static allocation results. + struct StaticAllocation { + const AllocaInst *Representative; + uint64_t Offset; + StaticAllocation() {} + StaticAllocation(const AllocaInst *A, uint64_t O) + : Representative(A), Offset(O) {} + }; + typedef DenseMap<const AllocaInst *, StaticAllocation> StaticAllocaMap; + StaticAllocaMap StaticAllocas; + uint64_t FrameSize; + + uint64_t getSize(const AllocaInst *AI); + unsigned getAlignment(const AllocaInst *AI); + AllocaInfo getInfo(const AllocaInst *AI); + const AllocaInst *getAllocaFromIntrinsic(const CallInst *CI); + static int AllocaSort(const void *l, const void *r); + + void collectMarkedAllocas(); + void collectBlocks(); + void computeInterBlockLiveness(); + void computeIntraBlockLiveness(); + void computeRepresentatives(); + void computeFrameOffsets(); + +public: + AllocaManager(); + + /// Analyze the given function and prepare for getRepresentative queries. + void analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring); + + /// Reset all stored state. + void clear(); + + /// Return the representative alloca for the given alloca. When allocas are + /// merged, one is chosen as the representative to stand for the rest. + /// References to the alloca should take the form of references to the + /// representative. + const AllocaInst *getRepresentative(const AllocaInst *AI) const; + + /// Set *offset to the frame offset for the given alloca. Return true if the + /// given alloca is representative, meaning that it needs an explicit + /// definition in the function entry. Return false if some other alloca + /// represents this one. + bool getFrameOffset(const AllocaInst *AI, uint64_t *offset) const; + + /// Return the total frame size for all static allocas and associated padding. + uint64_t getFrameSize() const { return FrameSize; } +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt index a80e23050a..521210e94d 100644 --- a/lib/Target/JSBackend/CMakeLists.txt +++ b/lib/Target/JSBackend/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_target(JSBackendCodeGen + AllocaManager.cpp ExpandI64.cpp JSBackend.cpp JSTargetMachine.cpp diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp index f1a6aa59ee..92d562a1df 100644 --- a/lib/Target/JSBackend/JSBackend.cpp +++ b/lib/Target/JSBackend/JSBackend.cpp @@ -16,6 +16,7 @@ #include "JSTargetMachine.h" #include "MCTargetDesc/JSBackendMCTargetDesc.h" +#include "AllocaManager.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -103,7 +104,6 @@ namespace { typedef std::vector<unsigned char> HeapData; typedef std::pair<unsigned, unsigned> Address; typedef std::map<std::string, Type::TypeID> VarMap; - typedef std::map<const AllocaInst*, unsigned> AllocaIntMap; typedef std::map<std::string, Address> GlobalAddressMap; typedef std::vector<std::string> FunctionTable; typedef std::map<std::string, FunctionTable> FunctionTableMap; @@ -121,8 +121,7 @@ namespace { unsigned UniqueNum; ValueMap ValueNames; VarMap UsedVars; - AllocaIntMap StackAllocs; - unsigned TotalStackAllocs; + AllocaManager Allocas; HeapData GlobalData8; HeapData GlobalData32; HeapData GlobalData64; @@ -140,13 +139,16 @@ namespace { bool UsesSIMD; int InvokeState; // cycles between 0, 1 after preInvoke, 2 after call, 0 again after postInvoke. hackish, no argument there. + CodeGenOpt::Level OptLevel; DataLayout *DL; #include "CallHandlers.h" public: static char ID; - explicit JSWriter(formatted_raw_ostream &o) : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0) {} + JSWriter(formatted_raw_ostream &o, CodeGenOpt::Level OptLevel) + : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0), + OptLevel(OptLevel) {} virtual const char *getPassName() const { return "JavaScript backend"; } @@ -431,6 +433,10 @@ static inline char halfCharToHex(unsigned char half) { } static inline void sanitizeGlobal(std::string& str) { + // Global names are prefixed with "_" to prevent them from colliding with + // names of things in normal JS. + str = "_" + str; + // functions and globals should already be in C-style format, // in addition to . for llvm intrinsics and possibly $ and so forth. // There is a risk of collisions here, we just lower all these @@ -444,6 +450,10 @@ static inline void sanitizeGlobal(std::string& str) { } static inline void sanitizeLocal(std::string& str) { + // Local names are prefixed with "$" to prevent them from colliding with + // global names. + str = "$" + str; + // We need to convert every string that is not a valid JS identifier into // a valid one, without collisions - we cannot turn "x.a" into "x_a" while // also leaving "x_a" as is, for example. @@ -565,18 +575,29 @@ const std::string &JSWriter::getJSName(const Value* val) { if (I != ValueNames.end() && I->first == val) return I->second; + // If this is an alloca we've replaced with another, use the other name. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(val)) { + if (AI->isStaticAlloca()) { + const AllocaInst *Rep = Allocas.getRepresentative(AI); + if (Rep != AI) { + return getJSName(Rep); + } + } + } + std::string name; if (val->hasName()) { - if (isa<Function>(val) || isa<Constant>(val)) { - name = std::string("_") + val->getName().str(); - sanitizeGlobal(name); - } else { - name = std::string("$") + val->getName().str(); - sanitizeLocal(name); - } + name = val->getName().str(); + } else { + name = utostr(UniqueNum++); + } + + if (isa<Constant>(val)) { + sanitizeGlobal(name); } else { - name = "u$" + utostr(UniqueNum++); + sanitizeLocal(name); } + return ValueNames[val] = name; } @@ -1368,22 +1389,31 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { break; } case Instruction::Alloca: { - if (NativizedVars.count(I)) { + const AllocaInst* AI = cast<AllocaInst>(I); + + if (NativizedVars.count(AI)) { // nativized stack variable, we just need a 'var' definition - UsedVars[getJSName(I)] = cast<PointerType>(I->getType())->getElementType()->getTypeID(); + UsedVars[getJSName(AI)] = AI->getType()->getElementType()->getTypeID(); return; } - const AllocaInst* AI = cast<AllocaInst>(I); - AllocaIntMap::iterator AIMI = StackAllocs.find(AI); - if (AIMI != StackAllocs.end()) { - // fixed-size allocation that is already taken into account in the big initial allocation - if (AIMI->second) { - Code << getAssign(AI) << "sp + " << utostr(AIMI->second) << "|0"; - } else { - Code << getAssign(AI) << "sp"; + + // Fixed-size entry-block allocations are allocated all at once in the + // function prologue. + if (AI->isStaticAlloca()) { + uint64_t Offset; + if (Allocas.getFrameOffset(AI, &Offset)) { + if (Offset != 0) { + Code << getAssign(AI) << "sp + " << Offset << "|0"; + } else { + Code << getAssign(AI) << "sp"; + } + break; } - break; + // Otherwise, this alloca is being represented by another alloca, so + // there's nothing to print. + return; } + Type *T = AI->getAllocatedType(); std::string Size; uint64_t BaseSize = DL->getTypeAllocSize(T); @@ -1775,8 +1805,8 @@ void JSWriter::printFunctionBody(const Function *F) { // Emit stack entry Out << " " << getAdHocAssign("sp", Type::getInt32Ty(F->getContext())) << "STACKTOP;"; - if (TotalStackAllocs) { - Out << "\n " << "STACKTOP = STACKTOP + " + utostr(TotalStackAllocs) + "|0;"; + if (uint64_t FrameSize = Allocas.getFrameSize()) { + Out << "\n " "STACKTOP = STACKTOP + " << FrameSize << "|0;"; } // Emit (relooped) code @@ -1815,59 +1845,24 @@ void JSWriter::processConstants() { void JSWriter::printFunction(const Function *F) { ValueNames.clear(); - // Ensure all arguments and locals are named (we assume used values need names, which might be false if the optimizer did not run) - unsigned Next = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - if (!AI->hasName() && !AI->use_empty()) { - ValueNames[AI] = "$" + utostr(Next++); - } - } - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); - II != E; ++II) { - if (!II->hasName() && !II->use_empty()) { - ValueNames[II] = "$" + utostr(Next++); - } - } - } - // Prepare and analyze function UsedVars.clear(); UniqueNum = 0; - calculateNativizedVars(F); - StackAllocs.clear(); - TotalStackAllocs = 0; + // When optimizing, the regular optimizer (mem2reg, SROA, GVN, and others) + // will have already taken all the opportunities for nativization. + if (OptLevel == CodeGenOpt::None) + calculateNativizedVars(F); - for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) { - for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); II != E; ++II) { - if (const AllocaInst* AI = dyn_cast<AllocaInst>(II)) { - Type *T = AI->getAllocatedType(); - const Value *AS = AI->getArraySize(); - unsigned BaseSize = DL->getTypeAllocSize(T); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(AS)) { - // TODO: group by alignment to avoid unnecessary padding - unsigned Size = stackAlign(BaseSize * CI->getZExtValue()); - StackAllocs[AI] = TotalStackAllocs; - TotalStackAllocs += Size; - } - } else { - // stop after the first non-alloca - could alter the stack - // however, ptrtoints are ok, and the legalizaton passes introduce them - if (!isa<PtrToIntInst>(II)) break; - } - } - break; - } + // Do alloca coloring at -O1 and higher. + Allocas.analyze(*F, *DL, OptLevel != CodeGenOpt::None); // Emit the function std::string Name = F->getName(); sanitizeGlobal(Name); - Out << "function _" << Name << "("; + Out << "function " << Name << "("; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { if (AI != F->arg_begin()) Out << ","; @@ -1884,6 +1879,8 @@ void JSWriter::printFunction(const Function *F) { printFunctionBody(F); Out << "}"; nl(Out); + + Allocas.clear(); } void JSWriter::printModuleBody() { @@ -2346,8 +2343,16 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM, assert(FileType == TargetMachine::CGFT_AssemblyFile); PM.add(createExpandI64Pass()); - PM.add(createSimplifyAllocasPass()); - PM.add(new JSWriter(o)); + + CodeGenOpt::Level OptLevel = getOptLevel(); + + // When optimizing, there shouldn't be any opportunities for SimplifyAllocas + // because the regular optimizer should have taken them all (GVN, and possibly + // also SROA). + if (OptLevel == CodeGenOpt::None) + PM.add(createSimplifyAllocasPass()); + + PM.add(new JSWriter(o, OptLevel)); return false; } diff --git a/test/CodeGen/JS/allocamanager.ll b/test/CodeGen/JS/allocamanager.ll new file mode 100644 index 0000000000..c2f7c5f53d --- /dev/null +++ b/test/CodeGen/JS/allocamanager.ll @@ -0,0 +1,166 @@ +; RUN: llc -march=js -o - < %s | FileCheck %s + +; Basic AllocaManager feature test. Eliminate user variable cupcake in favor of +; user variable muffin, and combine all the vararg buffers. And align the stack +; pointer. + +; ModuleID = 'test/CodeGen/JS/allocamanager.ll' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +%struct._IO_FILE = type opaque + +@stderr = external constant [4 x i8], align 4 +@.str = private unnamed_addr constant [26 x i8] c"hello from %s; argc is %d\00", align 1 +@.str1 = private unnamed_addr constant [33 x i8] c"message from the program: \22%s\22!\0A\00", align 1 +@.str2 = private unnamed_addr constant [38 x i8] c"with argc %d, I, %s, must say goodbye\00", align 1 +@.str3 = private unnamed_addr constant [43 x i8] c"another message from the program: \22%s\22...\0A\00", align 1 + +; CHECK: function _foo($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @foo(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %tmp1 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp1, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp2 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp3 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp3) #0 + %tmp4 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp4, i8** %vararg_ptr8, align 4 + %call5 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp3, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp3, i8** %vararg_ptr11, align 4 + %call7 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + call void @llvm.lifetime.end(i64 119, i8* %tmp3) #0 + ret void +} + +; CHECK: function _bar($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @bar(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %cmp = icmp eq i32 %argc, 39 + br i1 %cmp, label %if.end.thread, label %if.end + +if.end.thread: ; preds = %entry + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp1 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp1) #0 + %.pre = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + br label %if.then4 + +if.end: ; preds = %entry + %tmp2 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp2, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp3 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp3, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp4 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp4) #0 + %cmp3 = icmp eq i32 %argc, 45 + br i1 %cmp3, label %if.end10, label %if.then4 + +if.then4: ; preds = %if.end, %if.end.thread + %tmp5 = phi %struct._IO_FILE* [ %.pre, %if.end.thread ], [ %tmp3, %if.end ] + %tmp6 = phi i8* [ %tmp1, %if.end.thread ], [ %tmp4, %if.end ] + %tmp7 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp7, i8** %vararg_ptr8, align 4 + %call7 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp6, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp6, i8** %vararg_ptr11, align 4 + %call9 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + br label %if.end10 + +if.end10: ; preds = %if.then4, %if.end + %tmp8 = phi i8* [ %tmp4, %if.end ], [ %tmp6, %if.then4 ] + call void @llvm.lifetime.end(i64 119, i8* %tmp8) #0 + ret void +} + +; Function Attrs: nounwind +declare i32 @sprintf(i8*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @fprintf(%struct._IO_FILE*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +attributes #0 = { nounwind } |