aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2014-03-05 13:16:08 -0800
committerAlon Zakai <alonzakai@gmail.com>2014-03-05 13:16:08 -0800
commit450872142f746f2047816534c345981be2e7bdf1 (patch)
tree1b6cdcfac88f4400566b5071b00504843e7c27a4
parentdc1378e24ff461546f531ec3a6138046b8e9c9ef (diff)
parentc6f3be7a4da7ba2f94dcbd3b442147807b558b99 (diff)
Merge pull request #28 from sunfishcode/incoming
The alloca coloring patch
-rw-r--r--lib/Target/JSBackend/AllocaManager.cpp520
-rw-r--r--lib/Target/JSBackend/AllocaManager.h172
-rw-r--r--lib/Target/JSBackend/CMakeLists.txt1
-rw-r--r--lib/Target/JSBackend/JSBackend.cpp143
-rw-r--r--test/CodeGen/JS/allocamanager.ll166
5 files changed, 933 insertions, 69 deletions
diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp
new file mode 100644
index 0000000000..4673a34644
--- /dev/null
+++ b/lib/Target/JSBackend/AllocaManager.cpp
@@ -0,0 +1,520 @@
+//===-- AllocaManager.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AllocaManager class.
+//
+// The AllocaManager computes a frame layout, assigning every static alloca an
+// offset. It does alloca liveness analysis in order to reuse stack memory,
+// using lifetime intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "allocamanager"
+#include "AllocaManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAllocas, "Number of allocas eliminated");
+
+// Return the size of the given alloca.
+uint64_t AllocaManager::getSize(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return DL->getTypeAllocSize(AI->getAllocatedType()) *
+ cast<ConstantInt>(AI->getArraySize())->getValue().getZExtValue();
+}
+
+// Return the alignment of the given alloca.
+unsigned AllocaManager::getAlignment(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return std::max(AI->getAlignment(),
+ DL->getABITypeAlignment(AI->getAllocatedType()));
+}
+
+AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI) {
+ assert(AI->isStaticAlloca());
+ return AllocaInfo(AI, getSize(AI), getAlignment(AI));
+}
+
+// Given a lifetime_start or lifetime_end intrinsic, determine if it's
+// describing a static alloc memory region suitable for our analysis. If so,
+// return the alloca, otherwise return NULL.
+const AllocaInst *
+AllocaManager::getAllocaFromIntrinsic(const CallInst *CI) {
+ const IntrinsicInst *II = cast<IntrinsicInst>(CI);
+ assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end);
+
+ // Lifetime intrinsics have a size as their first argument and a pointer as
+ // their second argument.
+ const Value *Size = II->getArgOperand(0);
+ const Value *Ptr = II->getArgOperand(1);
+
+ // Check to see if we can convert the size to a host integer. If we can't,
+ // it's probably not worth worrying about.
+ const ConstantInt *SizeCon = dyn_cast<ConstantInt>(Size);
+ if (!SizeCon) return NULL;
+ const APInt &SizeAP = SizeCon->getValue();
+ if (SizeAP.getActiveBits() > 64) return NULL;
+ uint64_t MarkedSize = SizeAP.getZExtValue();
+
+ // We're only interested if the pointer is a static alloca.
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr->stripPointerCasts());
+ if (!AI || !AI->isStaticAlloca()) return NULL;
+
+ // Make sure the size covers the alloca.
+ if (MarkedSize < getSize(AI)) return NULL;
+
+ return AI;
+}
+
+int AllocaManager::AllocaSort(const void *l, const void *r) {
+ const AllocaInfo *li = static_cast<const AllocaInfo *>(l);
+ const AllocaInfo *ri = static_cast<const AllocaInfo *>(r);
+
+ // Sort by alignment to minimize padding.
+ if (li->getAlignment() > ri->getAlignment()) return -1;
+ if (li->getAlignment() < ri->getAlignment()) return 1;
+
+ // Ensure a stable sort. We can do this because the pointers are
+ // pointing into the same array.
+ if (li > ri) return -1;
+ if (li < ri) return 1;
+
+ return 0;
+}
+
+// Collect allocas
+void AllocaManager::collectMarkedAllocas() {
+ NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager",
+ TimePassesIsEnabled);
+
+ // Weird semantics: If an alloca *ever* appears in a lifetime start or end
+ // within the same function, its lifetime begins only at the explicit lifetime
+ // starts and ends only at the explicit lifetime ends and function exit
+ // points. Otherwise, its lifetime begins in the entry block and it is live
+ // everywhere.
+ //
+ // And so, instead of just walking the entry block to find all the static
+ // allocas, we walk the whole body to find the intrinsics so we can find the
+ // set of static allocas referenced in the intrinsics.
+ for (Function::const_iterator FI = F->begin(), FE = F->end();
+ FI != FE; ++FI) {
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart || Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ Allocas.insert(std::make_pair(AI, 0));
+ }
+ }
+ }
+ }
+
+ // All that said, we still want the intrinsics in the order they appear in the
+ // block, so that we can represent later ones with earlier ones and skip
+ // worrying about dominance, so run through the entry block and index those
+ // allocas which we identified above.
+ AllocasByIndex.reserve(Allocas.size());
+ const BasicBlock *EntryBB = &F->getEntryBlock();
+ for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+ BI != BE; ++BI) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+ if (!AI || !AI->isStaticAlloca()) continue;
+
+ AllocaMap::iterator I = Allocas.find(AI);
+ if (I != Allocas.end()) {
+ I->second = AllocasByIndex.size();
+ AllocasByIndex.push_back(getInfo(AI));
+ }
+ }
+ assert(AllocasByIndex.size() == Allocas.size());
+}
+
+// Calculate the starting point from which inter-block liveness will be
+// computed.
+void AllocaManager::collectBlocks() {
+ NamedRegionTimer Timer("Collect Blocks", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Seen(AllocaCount);
+
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ const BasicBlock *BB = I;
+
+ BlockLifetimeInfo &BLI = BlockLiveness[BB];
+ BLI.Start.resize(AllocaCount);
+ BLI.End.resize(AllocaCount);
+
+ // Track which allocas we've seen. This is used because if a lifetime start
+ // is the first lifetime marker for an alloca in a block, the alloca is
+ // live-in.
+ Seen.reset();
+
+ // Walk the instructions and compute the Start and End sets.
+ for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ AllocaMap::const_iterator MI = Allocas.find(AI);
+ if (MI != Allocas.end()) {
+ size_t AllocaIndex = MI->second;
+ if (!Seen.test(AllocaIndex)) {
+ BLI.Start.set(AllocaIndex);
+ }
+ BLI.End.reset(AllocaIndex);
+ Seen.set(AllocaIndex);
+ }
+ }
+ } else if (Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ AllocaMap::const_iterator MI = Allocas.find(AI);
+ if (MI != Allocas.end()) {
+ size_t AllocaIndex = MI->second;
+ BLI.End.set(AllocaIndex);
+ Seen.set(AllocaIndex);
+ }
+ }
+ }
+ }
+
+ // Lifetimes that start in this block and do not end here are live-out.
+ BLI.LiveOut = BLI.Start;
+ BLI.LiveOut.reset(BLI.End);
+ if (BLI.LiveOut.any()) {
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ InterBlockWorklist.insert(*SI);
+ }
+ }
+
+ // Lifetimes that end in this block and do not start here are live-in.
+ // TODO: Is this actually true? What are the semantics of a standalone
+ // lifetime end? See also the code in computeInterBlockLiveness.
+ BLI.LiveIn = BLI.End;
+ BLI.LiveIn.reset(BLI.Start);
+ if (BLI.LiveIn.any()) {
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ InterBlockWorklist.insert(*PI);
+ }
+ }
+ }
+}
+
+// Compute the LiveIn and LiveOut sets for each block in F.
+void AllocaManager::computeInterBlockLiveness() {
+ NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Temp(AllocaCount);
+
+ // This is currently using a very simple-minded bi-directional liveness
+ // propagation algorithm. Numerous opportunities for compile time
+ // speedups here.
+ while (!InterBlockWorklist.empty()) {
+ const BasicBlock *BB = InterBlockWorklist.pop_back_val();
+ BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+ // Compute the new live-in set.
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ Temp |= BlockLiveness[*PI].LiveOut;
+ }
+
+ // If it contains new live blocks, prepare to propagate them.
+ if (Temp.test(BLI.LiveIn)) {
+ BLI.LiveIn |= Temp;
+ BLI.LiveOut |= Temp;
+ BLI.LiveOut.reset(BLI.End);
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ InterBlockWorklist.insert(*SI);
+ }
+ }
+ Temp.reset();
+
+ // Compute the new live-out set.
+ for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ Temp |= BlockLiveness[*SI].LiveIn;
+ }
+
+ // If it contains new live blocks, prepare to propagate them.
+ if (Temp.test(BLI.LiveOut)) {
+ // TODO: As above, what are the semantics of a standalone lifetime end?
+ BLI.LiveOut |= Temp;
+ BLI.LiveIn |= Temp;
+ BLI.LiveIn.reset(BLI.Start);
+ for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ InterBlockWorklist.insert(*PI);
+ }
+ }
+ Temp.reset();
+ }
+}
+
+// Determine overlapping liveranges within blocks.
+void AllocaManager::computeIntraBlockLiveness() {
+ NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager",
+ TimePassesIsEnabled);
+
+ size_t AllocaCount = AllocasByIndex.size();
+
+ BitVector Current(AllocaCount);
+
+ AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true));
+
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ const BasicBlock *BB = I;
+ const BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+ Current = BLI.LiveIn;
+
+ for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+ AllocaCompatibility[i].reset(Current);
+ }
+
+ for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ const CallInst *CI = dyn_cast<CallInst>(BI);
+ if (!CI) continue;
+
+ const Value *Callee = CI->getCalledValue();
+ if (Callee == LifetimeStart) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ size_t AIndex = Allocas[AI];
+ // We conflict with everything else that's currently live.
+ AllocaCompatibility[AIndex].reset(Current);
+ // Everything else that's currently live conflicts with us.
+ for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+ AllocaCompatibility[i].reset(AIndex);
+ }
+ // We're now live.
+ Current.set(AIndex);
+ }
+ } else if (Callee == LifetimeEnd) {
+ if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+ size_t AIndex = Allocas[AI];
+ // We're no longer live.
+ Current.reset(AIndex);
+ }
+ }
+ }
+ }
+}
+
+// Decide which allocas will represent which other allocas, and if so what their
+// size and alignment will need to be.
+void AllocaManager::computeRepresentatives() {
+ NamedRegionTimer Timer("Compute Representatives", "AllocaManager",
+ TimePassesIsEnabled);
+
+ for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+ // If we've already represented this alloca with another, don't visit it.
+ if (AllocasByIndex[i].isForwarded()) continue;
+ if (i > size_t(INT_MAX)) continue;
+
+ // Find compatible allocas. This is a simple greedy algorithm.
+ for (int j = int(i); ; ) {
+ assert(j >= int(i));
+ j = AllocaCompatibility[i].find_next(j);
+ assert(j != int(i));
+ if (j < 0) break;
+ if (!AllocaCompatibility[j][i]) continue;
+
+ DEBUG(dbgs() << "Allocas: "
+ "Representing "
+ << AllocasByIndex[j].getInst()->getName() << " "
+ "with "
+ << AllocasByIndex[i].getInst()->getName() << "\n");
+ ++NumAllocas;
+
+ assert(!AllocasByIndex[j].isForwarded());
+
+ AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize());
+ AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment());
+ AllocasByIndex[j].forward(i);
+
+ AllocaCompatibility[i] &= AllocaCompatibility[j];
+ AllocaCompatibility[j].reset();
+ }
+ }
+}
+
+void AllocaManager::computeFrameOffsets() {
+ NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager",
+ TimePassesIsEnabled);
+
+ // Walk through the entry block and collect all the allocas, including the
+ // ones with no lifetime markers that we haven't looked at yet. We walk in
+ // reverse order so that we can set the representative allocas as those that
+ // dominate the others as we go.
+ const BasicBlock *EntryBB = &F->getEntryBlock();
+ for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+ BI != BE; ++BI) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+ if (!AI || !AI->isStaticAlloca()) continue;
+
+ AllocaMap::const_iterator I = Allocas.find(AI);
+ if (I != Allocas.end()) {
+ // An alloca with lifetime markers. Emit the record we've crafted for it,
+ // if we've chosen to keep it as a representative.
+ const AllocaInfo &Info = AllocasByIndex[I->second];
+ if (!Info.isForwarded()) {
+ SortedAllocas.push_back(Info);
+ }
+ } else {
+ // An alloca with no lifetime markers.
+ SortedAllocas.push_back(getInfo(AI));
+ }
+ }
+
+ // Sort the allocas to hopefully reduce padding.
+ array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort);
+
+ // Assign stack offsets.
+ uint64_t CurrentOffset = 0;
+ for (SmallVectorImpl<AllocaInfo>::const_iterator I = SortedAllocas.begin(),
+ E = SortedAllocas.end(); I != E; ++I) {
+ const AllocaInfo &Info = *I;
+ uint64_t NewOffset = RoundUpToAlignment(CurrentOffset, Info.getAlignment());
+
+ // For backwards compatibility, align every power-of-two multiple alloca to
+ // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash
+ // is known to depend on this.
+ // TODO: Consider disabling this and making people fix their code.
+ if (uint64_t Size = Info.getSize()) {
+ uint64_t P2 = uint64_t(1) << CountTrailingZeros_64(Size);
+ unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8)));
+ NewOffset = RoundUpToAlignment(NewOffset, CompatAlign);
+ }
+
+ const AllocaInst *AI = Info.getInst();
+ StaticAllocas[AI] = StaticAllocation(AI, NewOffset);
+
+ CurrentOffset = NewOffset + Info.getSize();
+ }
+
+ // Add allocas that were represented by other allocas to the StaticAllocas map
+ // so that our clients can look them up.
+ for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+ const AllocaInfo &Info = AllocasByIndex[i];
+ if (!Info.isForwarded()) continue;
+ size_t j = Info.getForwardedID();
+ assert(!AllocasByIndex[j].isForwarded());
+
+ StaticAllocaMap::const_iterator I =
+ StaticAllocas.find(AllocasByIndex[j].getInst());
+ assert(I != StaticAllocas.end());
+
+ std::pair<StaticAllocaMap::const_iterator, bool> Pair =
+ StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(),
+ I->second));
+ assert(Pair.second); (void)Pair;
+ }
+
+ // Record the final frame size. Keep the stack pointer 16-byte aligned.
+ FrameSize = CurrentOffset;
+ FrameSize = RoundUpToAlignment(FrameSize, 16);
+
+ DEBUG(dbgs() << "Allocas: "
+ "Statically allocated frame size is " << FrameSize << "\n");
+}
+
+AllocaManager::AllocaManager() {
+}
+
+void AllocaManager::analyze(const Function &Func, const DataLayout &Layout,
+ bool PerformColoring) {
+ NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled);
+
+ assert(Allocas.empty());
+ assert(AllocasByIndex.empty());
+ assert(AllocaCompatibility.empty());
+ assert(BlockLiveness.empty());
+ assert(StaticAllocas.empty());
+ assert(SortedAllocas.empty());
+
+ DL = &Layout;
+ F = &Func;
+
+ // Get the declarations for the lifetime intrinsics so we can quickly test to
+ // see if they are used at all, and for use later if they are.
+ const Module *M = F->getParent();
+ LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start));
+ LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end));
+
+ // If we are optimizing and the module contains any lifetime intrinsics, run
+ // the alloca coloring algorithm.
+ if (PerformColoring &&
+ ((LifetimeStart && !LifetimeStart->use_empty()) ||
+ (LifetimeEnd && !LifetimeEnd->use_empty()))) {
+
+ collectMarkedAllocas();
+
+ if (!AllocasByIndex.empty()) {
+ DEBUG(dbgs() << "Allocas: "
+ << AllocasByIndex.size() << " marked allocas found\n");
+
+ collectBlocks();
+ computeInterBlockLiveness();
+ computeIntraBlockLiveness();
+ BlockLiveness.clear();
+
+ computeRepresentatives();
+ AllocaCompatibility.clear();
+ }
+ }
+
+ computeFrameOffsets();
+ SortedAllocas.clear();
+ Allocas.clear();
+ AllocasByIndex.clear();
+}
+
+void AllocaManager::clear() {
+ StaticAllocas.clear();
+}
+
+bool
+AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const {
+ assert(AI->isStaticAlloca());
+ StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+ assert(I != StaticAllocas.end());
+ *Offset = I->second.Offset;
+ return AI == I->second.Representative;
+}
+
+const AllocaInst *
+AllocaManager::getRepresentative(const AllocaInst *AI) const {
+ assert(AI->isStaticAlloca());
+ StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+ assert(I != StaticAllocas.end());
+ return I->second.Representative;
+}
diff --git a/lib/Target/JSBackend/AllocaManager.h b/lib/Target/JSBackend/AllocaManager.h
new file mode 100644
index 0000000000..44b07981bc
--- /dev/null
+++ b/lib/Target/JSBackend/AllocaManager.h
@@ -0,0 +1,172 @@
+//===-- AllocaManager.h ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass declares the AllocaManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JSBACKEND_ALLOCAMANAGER_H
+#define JSBACKEND_ALLOCAMANAGER_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class DataLayout;
+class Function;
+
+/// Compute frame layout for allocas.
+class AllocaManager {
+ const DataLayout *DL;
+ const Function *LifetimeStart;
+ const Function *LifetimeEnd;
+ const Function *F;
+
+ // Per-block lifetime information.
+ struct BlockLifetimeInfo {
+ BitVector Start;
+ BitVector End;
+ BitVector LiveIn;
+ BitVector LiveOut;
+ };
+ typedef DenseMap<const BasicBlock *, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ // Worklist for inter-block liveness analysis.
+ typedef SmallSetVector<const BasicBlock *, 8> InterBlockWorklistVec;
+ InterBlockWorklistVec InterBlockWorklist;
+
+ // Map allocas to their index in AllocasByIndex.
+ typedef DenseMap<const AllocaInst *, size_t> AllocaMap;
+ AllocaMap Allocas;
+
+ // Information about an alloca. Note that the size and alignment may vary
+ // from what's in the actual AllocaInst when an alloca is also representing
+ // another with perhaps greater size and/or alignment needs.
+ //
+ // When an alloca is represented by another, its AllocaInfo is marked as
+ // "forwarded", at which point it no longer holds a size and alignment, but
+ // the index of the representative AllocaInfo.
+ class AllocaInfo {
+ const AllocaInst *Inst;
+ uint64_t Size;
+ unsigned Alignment;
+
+ public:
+ AllocaInfo(const AllocaInst *I, uint64_t S, unsigned A)
+ : Inst(I), Size(S), Alignment(A) {
+ assert(I != NULL);
+ assert(A != 0);
+ assert(!isForwarded());
+ }
+
+ bool isForwarded() const { return Alignment == 0; }
+
+ size_t getForwardedID() const {
+ assert(isForwarded());
+ return static_cast<size_t>(Size);
+ }
+
+ void forward(size_t i) {
+ assert(!isForwarded());
+ Alignment = 0;
+ Size = i;
+ assert(isForwarded());
+ assert(getForwardedID() == i);
+ }
+
+ const AllocaInst *getInst() const { return Inst; }
+
+ uint64_t getSize() const { assert(!isForwarded()); return Size; }
+ unsigned getAlignment() const { assert(!isForwarded()); return Alignment; }
+
+ void mergeSize(uint64_t S) {
+ assert(!isForwarded());
+ Size = std::max(Size, S);
+ assert(!isForwarded());
+ }
+ void mergeAlignment(unsigned A) {
+ assert(A != 0);
+ assert(!isForwarded());
+ Alignment = std::max(Alignment, A);
+ assert(!isForwarded());
+ }
+ };
+ typedef SmallVector<AllocaInfo, 32> AllocaVec;
+ AllocaVec AllocasByIndex;
+
+ // For each alloca, which allocas can it safely represent? Allocas are
+ // identified by AllocasByIndex index.
+ // TODO: Vector-of-vectors isn't the fastest data structure possible here.
+ typedef SmallVector<BitVector, 32> AllocaCompatibilityVec;
+ AllocaCompatibilityVec AllocaCompatibility;
+
+ // This is for allocas that will eventually be sorted.
+ SmallVector<AllocaInfo, 32> SortedAllocas;
+
+ // Static allocation results.
+ struct StaticAllocation {
+ const AllocaInst *Representative;
+ uint64_t Offset;
+ StaticAllocation() {}
+ StaticAllocation(const AllocaInst *A, uint64_t O)
+ : Representative(A), Offset(O) {}
+ };
+ typedef DenseMap<const AllocaInst *, StaticAllocation> StaticAllocaMap;
+ StaticAllocaMap StaticAllocas;
+ uint64_t FrameSize;
+
+ uint64_t getSize(const AllocaInst *AI);
+ unsigned getAlignment(const AllocaInst *AI);
+ AllocaInfo getInfo(const AllocaInst *AI);
+ const AllocaInst *getAllocaFromIntrinsic(const CallInst *CI);
+ static int AllocaSort(const void *l, const void *r);
+
+ void collectMarkedAllocas();
+ void collectBlocks();
+ void computeInterBlockLiveness();
+ void computeIntraBlockLiveness();
+ void computeRepresentatives();
+ void computeFrameOffsets();
+
+public:
+ AllocaManager();
+
+ /// Analyze the given function and prepare for getRepresentative queries.
+ void analyze(const Function &Func, const DataLayout &Layout,
+ bool PerformColoring);
+
+ /// Reset all stored state.
+ void clear();
+
+ /// Return the representative alloca for the given alloca. When allocas are
+ /// merged, one is chosen as the representative to stand for the rest.
+ /// References to the alloca should take the form of references to the
+ /// representative.
+ const AllocaInst *getRepresentative(const AllocaInst *AI) const;
+
+ /// Set *offset to the frame offset for the given alloca. Return true if the
+ /// given alloca is representative, meaning that it needs an explicit
+ /// definition in the function entry. Return false if some other alloca
+ /// represents this one.
+ bool getFrameOffset(const AllocaInst *AI, uint64_t *offset) const;
+
+ /// Return the total frame size for all static allocas and associated padding.
+ uint64_t getFrameSize() const { return FrameSize; }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index a80e23050a..521210e94d 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_target(JSBackendCodeGen
+ AllocaManager.cpp
ExpandI64.cpp
JSBackend.cpp
JSTargetMachine.cpp
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index f1a6aa59ee..92d562a1df 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -16,6 +16,7 @@
#include "JSTargetMachine.h"
#include "MCTargetDesc/JSBackendMCTargetDesc.h"
+#include "AllocaManager.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
@@ -103,7 +104,6 @@ namespace {
typedef std::vector<unsigned char> HeapData;
typedef std::pair<unsigned, unsigned> Address;
typedef std::map<std::string, Type::TypeID> VarMap;
- typedef std::map<const AllocaInst*, unsigned> AllocaIntMap;
typedef std::map<std::string, Address> GlobalAddressMap;
typedef std::vector<std::string> FunctionTable;
typedef std::map<std::string, FunctionTable> FunctionTableMap;
@@ -121,8 +121,7 @@ namespace {
unsigned UniqueNum;
ValueMap ValueNames;
VarMap UsedVars;
- AllocaIntMap StackAllocs;
- unsigned TotalStackAllocs;
+ AllocaManager Allocas;
HeapData GlobalData8;
HeapData GlobalData32;
HeapData GlobalData64;
@@ -140,13 +139,16 @@ namespace {
bool UsesSIMD;
int InvokeState; // cycles between 0, 1 after preInvoke, 2 after call, 0 again after postInvoke. hackish, no argument there.
+ CodeGenOpt::Level OptLevel;
DataLayout *DL;
#include "CallHandlers.h"
public:
static char ID;
- explicit JSWriter(formatted_raw_ostream &o) : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0) {}
+ JSWriter(formatted_raw_ostream &o, CodeGenOpt::Level OptLevel)
+ : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0),
+ OptLevel(OptLevel) {}
virtual const char *getPassName() const { return "JavaScript backend"; }
@@ -431,6 +433,10 @@ static inline char halfCharToHex(unsigned char half) {
}
static inline void sanitizeGlobal(std::string& str) {
+ // Global names are prefixed with "_" to prevent them from colliding with
+ // names of things in normal JS.
+ str = "_" + str;
+
// functions and globals should already be in C-style format,
// in addition to . for llvm intrinsics and possibly $ and so forth.
// There is a risk of collisions here, we just lower all these
@@ -444,6 +450,10 @@ static inline void sanitizeGlobal(std::string& str) {
}
static inline void sanitizeLocal(std::string& str) {
+ // Local names are prefixed with "$" to prevent them from colliding with
+ // global names.
+ str = "$" + str;
+
// We need to convert every string that is not a valid JS identifier into
// a valid one, without collisions - we cannot turn "x.a" into "x_a" while
// also leaving "x_a" as is, for example.
@@ -565,18 +575,29 @@ const std::string &JSWriter::getJSName(const Value* val) {
if (I != ValueNames.end() && I->first == val)
return I->second;
+ // If this is an alloca we've replaced with another, use the other name.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(val)) {
+ if (AI->isStaticAlloca()) {
+ const AllocaInst *Rep = Allocas.getRepresentative(AI);
+ if (Rep != AI) {
+ return getJSName(Rep);
+ }
+ }
+ }
+
std::string name;
if (val->hasName()) {
- if (isa<Function>(val) || isa<Constant>(val)) {
- name = std::string("_") + val->getName().str();
- sanitizeGlobal(name);
- } else {
- name = std::string("$") + val->getName().str();
- sanitizeLocal(name);
- }
+ name = val->getName().str();
+ } else {
+ name = utostr(UniqueNum++);
+ }
+
+ if (isa<Constant>(val)) {
+ sanitizeGlobal(name);
} else {
- name = "u$" + utostr(UniqueNum++);
+ sanitizeLocal(name);
}
+
return ValueNames[val] = name;
}
@@ -1368,22 +1389,31 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) {
break;
}
case Instruction::Alloca: {
- if (NativizedVars.count(I)) {
+ const AllocaInst* AI = cast<AllocaInst>(I);
+
+ if (NativizedVars.count(AI)) {
// nativized stack variable, we just need a 'var' definition
- UsedVars[getJSName(I)] = cast<PointerType>(I->getType())->getElementType()->getTypeID();
+ UsedVars[getJSName(AI)] = AI->getType()->getElementType()->getTypeID();
return;
}
- const AllocaInst* AI = cast<AllocaInst>(I);
- AllocaIntMap::iterator AIMI = StackAllocs.find(AI);
- if (AIMI != StackAllocs.end()) {
- // fixed-size allocation that is already taken into account in the big initial allocation
- if (AIMI->second) {
- Code << getAssign(AI) << "sp + " << utostr(AIMI->second) << "|0";
- } else {
- Code << getAssign(AI) << "sp";
+
+ // Fixed-size entry-block allocations are allocated all at once in the
+ // function prologue.
+ if (AI->isStaticAlloca()) {
+ uint64_t Offset;
+ if (Allocas.getFrameOffset(AI, &Offset)) {
+ if (Offset != 0) {
+ Code << getAssign(AI) << "sp + " << Offset << "|0";
+ } else {
+ Code << getAssign(AI) << "sp";
+ }
+ break;
}
- break;
+ // Otherwise, this alloca is being represented by another alloca, so
+ // there's nothing to print.
+ return;
}
+
Type *T = AI->getAllocatedType();
std::string Size;
uint64_t BaseSize = DL->getTypeAllocSize(T);
@@ -1775,8 +1805,8 @@ void JSWriter::printFunctionBody(const Function *F) {
// Emit stack entry
Out << " " << getAdHocAssign("sp", Type::getInt32Ty(F->getContext())) << "STACKTOP;";
- if (TotalStackAllocs) {
- Out << "\n " << "STACKTOP = STACKTOP + " + utostr(TotalStackAllocs) + "|0;";
+ if (uint64_t FrameSize = Allocas.getFrameSize()) {
+ Out << "\n " "STACKTOP = STACKTOP + " << FrameSize << "|0;";
}
// Emit (relooped) code
@@ -1815,59 +1845,24 @@ void JSWriter::processConstants() {
void JSWriter::printFunction(const Function *F) {
ValueNames.clear();
- // Ensure all arguments and locals are named (we assume used values need names, which might be false if the optimizer did not run)
- unsigned Next = 1;
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- if (!AI->hasName() && !AI->use_empty()) {
- ValueNames[AI] = "$" + utostr(Next++);
- }
- }
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- for (BasicBlock::const_iterator II = BI->begin(), E = BI->end();
- II != E; ++II) {
- if (!II->hasName() && !II->use_empty()) {
- ValueNames[II] = "$" + utostr(Next++);
- }
- }
- }
-
// Prepare and analyze function
UsedVars.clear();
UniqueNum = 0;
- calculateNativizedVars(F);
- StackAllocs.clear();
- TotalStackAllocs = 0;
+ // When optimizing, the regular optimizer (mem2reg, SROA, GVN, and others)
+ // will have already taken all the opportunities for nativization.
+ if (OptLevel == CodeGenOpt::None)
+ calculateNativizedVars(F);
- for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
- for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); II != E; ++II) {
- if (const AllocaInst* AI = dyn_cast<AllocaInst>(II)) {
- Type *T = AI->getAllocatedType();
- const Value *AS = AI->getArraySize();
- unsigned BaseSize = DL->getTypeAllocSize(T);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(AS)) {
- // TODO: group by alignment to avoid unnecessary padding
- unsigned Size = stackAlign(BaseSize * CI->getZExtValue());
- StackAllocs[AI] = TotalStackAllocs;
- TotalStackAllocs += Size;
- }
- } else {
- // stop after the first non-alloca - could alter the stack
- // however, ptrtoints are ok, and the legalizaton passes introduce them
- if (!isa<PtrToIntInst>(II)) break;
- }
- }
- break;
- }
+ // Do alloca coloring at -O1 and higher.
+ Allocas.analyze(*F, *DL, OptLevel != CodeGenOpt::None);
// Emit the function
std::string Name = F->getName();
sanitizeGlobal(Name);
- Out << "function _" << Name << "(";
+ Out << "function " << Name << "(";
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
if (AI != F->arg_begin()) Out << ",";
@@ -1884,6 +1879,8 @@ void JSWriter::printFunction(const Function *F) {
printFunctionBody(F);
Out << "}";
nl(Out);
+
+ Allocas.clear();
}
void JSWriter::printModuleBody() {
@@ -2346,8 +2343,16 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
assert(FileType == TargetMachine::CGFT_AssemblyFile);
PM.add(createExpandI64Pass());
- PM.add(createSimplifyAllocasPass());
- PM.add(new JSWriter(o));
+
+ CodeGenOpt::Level OptLevel = getOptLevel();
+
+ // When optimizing, there shouldn't be any opportunities for SimplifyAllocas
+ // because the regular optimizer should have taken them all (GVN, and possibly
+ // also SROA).
+ if (OptLevel == CodeGenOpt::None)
+ PM.add(createSimplifyAllocasPass());
+
+ PM.add(new JSWriter(o, OptLevel));
return false;
}
diff --git a/test/CodeGen/JS/allocamanager.ll b/test/CodeGen/JS/allocamanager.ll
new file mode 100644
index 0000000000..c2f7c5f53d
--- /dev/null
+++ b/test/CodeGen/JS/allocamanager.ll
@@ -0,0 +1,166 @@
+; RUN: llc -march=js -o - < %s | FileCheck %s
+
+; Basic AllocaManager feature test. Eliminate user variable cupcake in favor of
+; user variable muffin, and combine all the vararg buffers. And align the stack
+; pointer.
+
+; ModuleID = 'test/CodeGen/JS/allocamanager.ll'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+%struct._IO_FILE = type opaque
+
+@stderr = external constant [4 x i8], align 4
+@.str = private unnamed_addr constant [26 x i8] c"hello from %s; argc is %d\00", align 1
+@.str1 = private unnamed_addr constant [33 x i8] c"message from the program: \22%s\22!\0A\00", align 1
+@.str2 = private unnamed_addr constant [38 x i8] c"with argc %d, I, %s, must say goodbye\00", align 1
+@.str3 = private unnamed_addr constant [43 x i8] c"another message from the program: \22%s\22...\0A\00", align 1
+
+; CHECK: function _foo($argc,$argv) {
+; CHECK-NOT: cupcake
+; CHECK: STACKTOP = STACKTOP + 128|0;
+; CHECK-NEXT: vararg_buffer0 =
+; CHECK-NEXT: $muffin =
+; CHECK-NOT: cupcake
+; CHECK: }
+
+; Function Attrs: nounwind
+define void @foo(i32 %argc, i8** %argv) #0 {
+entry:
+ %vararg_buffer0 = alloca <{ i8* }>, align 8
+ %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8*
+ %vararg_buffer5 = alloca <{ i32, i8* }>, align 8
+ %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8*
+ %vararg_buffer2 = alloca <{ i8* }>, align 8
+ %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8*
+ %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8
+ %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8*
+ %muffin = alloca [117 x i8], align 1
+ %cupcake = alloca [119 x i8], align 1
+ %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 117, i8* %tmp) #0
+ %tmp1 = load i8** %argv, align 4
+ call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast)
+ %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0
+ store i8* %tmp1, i8** %vararg_ptr, align 4
+ %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1
+ store i32 %argc, i32* %vararg_ptr1, align 4
+ %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0
+ call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+ %tmp2 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+ call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3)
+ %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0
+ store i8* %tmp, i8** %vararg_ptr4, align 4
+ %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0
+ call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3)
+ call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+ %tmp3 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 119, i8* %tmp3) #0
+ %tmp4 = load i8** %argv, align 4
+ call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6)
+ %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0
+ store i32 %argc, i32* %vararg_ptr7, align 4
+ %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1
+ store i8* %tmp4, i8** %vararg_ptr8, align 4
+ %call5 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp3, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0
+ call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6)
+ call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10)
+ %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0
+ store i8* %tmp3, i8** %vararg_ptr11, align 4
+ %call7 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0
+ call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10)
+ call void @llvm.lifetime.end(i64 119, i8* %tmp3) #0
+ ret void
+}
+
+; CHECK: function _bar($argc,$argv) {
+; CHECK-NOT: cupcake
+; CHECK: STACKTOP = STACKTOP + 128|0;
+; CHECK-NEXT: vararg_buffer0 =
+; CHECK-NEXT: $muffin =
+; CHECK-NOT: cupcake
+; CHECK: }
+
+; Function Attrs: nounwind
+define void @bar(i32 %argc, i8** %argv) #0 {
+entry:
+ %vararg_buffer0 = alloca <{ i8* }>, align 8
+ %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8*
+ %vararg_buffer5 = alloca <{ i32, i8* }>, align 8
+ %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8*
+ %vararg_buffer2 = alloca <{ i8* }>, align 8
+ %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8*
+ %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8
+ %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8*
+ %muffin = alloca [117 x i8], align 1
+ %cupcake = alloca [119 x i8], align 1
+ %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 117, i8* %tmp) #0
+ %cmp = icmp eq i32 %argc, 39
+ br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread: ; preds = %entry
+ call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+ %tmp1 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 119, i8* %tmp1) #0
+ %.pre = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+ br label %if.then4
+
+if.end: ; preds = %entry
+ %tmp2 = load i8** %argv, align 4
+ call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast)
+ %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0
+ store i8* %tmp2, i8** %vararg_ptr, align 4
+ %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1
+ store i32 %argc, i32* %vararg_ptr1, align 4
+ %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0
+ call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+ %tmp3 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+ call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3)
+ %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0
+ store i8* %tmp, i8** %vararg_ptr4, align 4
+ %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp3, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0
+ call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3)
+ call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+ %tmp4 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 119, i8* %tmp4) #0
+ %cmp3 = icmp eq i32 %argc, 45
+ br i1 %cmp3, label %if.end10, label %if.then4
+
+if.then4: ; preds = %if.end, %if.end.thread
+ %tmp5 = phi %struct._IO_FILE* [ %.pre, %if.end.thread ], [ %tmp3, %if.end ]
+ %tmp6 = phi i8* [ %tmp1, %if.end.thread ], [ %tmp4, %if.end ]
+ %tmp7 = load i8** %argv, align 4
+ call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6)
+ %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0
+ store i32 %argc, i32* %vararg_ptr7, align 4
+ %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1
+ store i8* %tmp7, i8** %vararg_ptr8, align 4
+ %call7 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp6, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0
+ call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6)
+ call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10)
+ %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0
+ store i8* %tmp6, i8** %vararg_ptr11, align 4
+ %call9 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0
+ call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10)
+ br label %if.end10
+
+if.end10: ; preds = %if.then4, %if.end
+ %tmp8 = phi i8* [ %tmp4, %if.end ], [ %tmp6, %if.then4 ]
+ call void @llvm.lifetime.end(i64 119, i8* %tmp8) #0
+ ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @sprintf(i8*, i8*, i8*) #0
+
+; Function Attrs: nounwind
+declare i32 @fprintf(%struct._IO_FILE*, i8*, i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+attributes #0 = { nounwind }