5 files changed, 2098 insertions, 455 deletions
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 515e1dd7e3..7e138b2695 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_target(CppBackendCodeGen
   CPPBackend.cpp
+  Relooper.cpp
   )
 
 add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 3e69098edc..299db973f8 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -38,6 +38,10 @@
 #include <set>
 using namespace llvm;
 
+#define dump(x, ...) fprintf(stderr, x, __VA_ARGS__)
+
+#include <Relooper.h>
+
 static cl::opt<std::string>
 FuncName("cppfname", cl::desc("Specify the name of the generated function"),
          cl::value_desc("function name"));
@@ -85,7 +89,10 @@ namespace {
   typedef std::set<std::string> NameSet;
   typedef std::set<Type*> TypeSet;
   typedef std::set<const Value*> ValueSet;
+  typedef std::map<std::string, Type::TypeID> VarMap;
   typedef std::map<const Value*,std::string> ForwardRefMap;
+  typedef std::vector<unsigned char> HeapData;
+  typedef std::pair<unsigned, unsigned> Address;
 
   /// CppWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C++ translation unit.
@@ -98,14 +105,20 @@ namespace {
     NameSet UsedNames;
     TypeSet DefinedTypes;
     ValueSet DefinedValues;
+    VarMap UsedVars;
     ForwardRefMap ForwardRefs;
     bool is_inline;
     unsigned indent_level;
+    HeapData GlobalData8;
+    HeapData GlobalData32;
+    HeapData GlobalData64;
+    std::map<std::string, Address> GlobalAddresses;
 
   public:
     static char ID;
     explicit CppWriter(formatted_raw_ostream &o) :
-      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){
+    }
 
     virtual const char *getPassName() const { return "C++ backend"; }
 
@@ -134,7 +147,28 @@ namespace {
     void printCallingConv(CallingConv::ID cc);
     void printEscapedString(const std::string& str);
     void printCFP(const ConstantFP* CFP);
-
+    void printCommaSeparated(const HeapData v);
+
+    void allocateConstant(const Constant* CV);
+    unsigned getGlobalAddress(const std::string &s) {
+      Address a = GlobalAddresses[s];
+      switch (a.second) {
+        case 64:
+          return a.first + 8;
+        case 32:
+          return a.first + 8 + GlobalData64.size();
+        case 8:
+          return a.first + 8 + GlobalData64.size() + GlobalData32.size();
+        default:
+          assert(false);
+      }
+    }
+    std::string getPtrLoad(const Value* Ptr);
+    std::string getPtrUse(const Value* Ptr);
+    std::string getPtr(const Value* Ptr);
+    std::string getConstant(const Constant*);
+    std::string getValueAsStr(const Value*);
+    std::string getValueAsParenStr(const Value*);
     std::string getCppName(Type* val);
     inline void printCppName(Type* val);
 
@@ -145,6 +179,10 @@ namespace {
     void printType(Type* Ty);
     void printTypes(const Module* M);
 
+    std::string getAssign(const StringRef &, const Type *);
+    std::string getCast(const StringRef &, const Type *);
+    std::string getParenCast(const StringRef &, const Type *);
+
     void printConstant(const Constant *CPV);
     void printConstants(const Module* M);
 
@@ -155,10 +193,14 @@ namespace {
     void printFunctionUses(const Function *F);
     void printFunctionHead(const Function *F);
     void printFunctionBody(const Function *F);
-    void printInstruction(const Instruction *I, const std::string& bbname);
+    std::string generateInstruction(const Instruction *I);
     std::string getOpName(const Value*);
 
     void printModuleBody();
+
+    unsigned stackAlign(unsigned x) {
+      return x + (x%4 != 0 ? 4 - x%4 : 0);
+    }
   };
 } // end anonymous namespace.
 
@@ -171,8 +213,8 @@ formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
 }
 
 static inline void sanitize(std::string &str) {
-  for (size_t i = 0; i < str.length(); ++i)
-    if (!isalnum(str[i]) && str[i] != '_')
+  for (size_t i = 1; i < str.length(); ++i)
+    if (!isalnum(str[i]) && str[i] != '_' && str[i] != '$')
       str[i] = '_';
 }
 
@@ -217,8 +259,6 @@ void CppWriter::printCFP(const ConstantFP *CFP) {
   APFloat APF = APFloat(CFP->getValueAPF());  // copy
   if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
-  Out << "ConstantFP::get(mod->getContext(), ";
-  Out << "APFloat(";
 #if HAVE_PRINTF_A
   char Buffer[100];
   sprintf(Buffer, "%A", APF.convertToDouble());
@@ -257,11 +297,9 @@ void CppWriter::printCFP(const ConstantFP *CFP) {
           << utohexstr((uint32_t)CFP->getValueAPF().
                                       bitcastToAPInt().getZExtValue())
           << "U) /* " << StrVal << " */";
-    Out << ")";
 #if HAVE_PRINTF_A
   }
 #endif
-  Out << ")";
 }
 
 void CppWriter::printCallingConv(CallingConv::ID cc){
@@ -423,40 +461,46 @@ std::string CppWriter::getCppName(const Value* val) {
   std::string name;
   ValueMap::iterator I = ValueNames.find(val);
   if (I != ValueNames.end() && I->first == val)
-    return  I->second;
-
-  if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
-    name = std::string("gvar_") +
-      getTypePrefix(GV->getType()->getElementType());
-  } else if (isa<Function>(val)) {
-    name = std::string("func_");
-  } else if (const Constant* C = dyn_cast<Constant>(val)) {
-    name = std::string("const_") + getTypePrefix(C->getType());
-  } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
-    if (is_inline) {
-      unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
-                                      Function::const_arg_iterator(Arg)) + 1;
-      name = std::string("arg_") + utostr(argNum);
-      NameSet::iterator NI = UsedNames.find(name);
-      if (NI != UsedNames.end())
-        name += std::string("_") + utostr(uniqueNum++);
-      UsedNames.insert(name);
-      return ValueNames[val] = name;
+    return I->second;
+
+  if (val->hasName()) {
+    if (dyn_cast<Function>(val)) {
+      name = std::string("_") + val->getName().str();
     } else {
-      name = getTypePrefix(val->getType());
+      name = std::string("$") + val->getName().str();
     }
+    sanitize(name);
   } else {
-    name = getTypePrefix(val->getType());
-  }
-  if (val->hasName())
-    name += val->getName();
-  else
+    if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+      name = std::string("gvar_") +
+        getTypePrefix(GV->getType()->getElementType());
+    } else if (isa<Function>(val)) {
+      name = std::string("func_");
+    } else if (const Constant* C = dyn_cast<Constant>(val)) {
+      name = std::string("const_") + getTypePrefix(C->getType());
+    } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+      if (is_inline) {
+        unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+                                        Function::const_arg_iterator(Arg)) + 1;
+        name = std::string("arg_") + utostr(argNum);
+        NameSet::iterator NI = UsedNames.find(name);
+        if (NI != UsedNames.end())
+          name += std::string("_") + utostr(uniqueNum++);
+        UsedNames.insert(name);
+        return ValueNames[val] = name;
+      } else {
+        name = getTypePrefix(val->getType());
+      }
+    } else {
+      name = getTypePrefix(val->getType());
+    }
     name += utostr(uniqueNum++);
-  sanitize(name);
-  NameSet::iterator NI = UsedNames.find(name);
-  if (NI != UsedNames.end())
-    name += std::string("_") + utostr(uniqueNum++);
-  UsedNames.insert(name);
+    sanitize(name);
+    NameSet::iterator NI = UsedNames.find(name);
+    if (NI != UsedNames.end())
+      name += std::string("_") + utostr(uniqueNum++);
+    UsedNames.insert(name);
+  }
   return ValueNames[val] = name;
 }
 
@@ -710,6 +754,29 @@ void CppWriter::printTypes(const Module* M) {
 }
 
 
+std::string CppWriter::getAssign(const StringRef &s, const Type *t) {
+  UsedVars[s] = t->getTypeID();
+  return (s + " = ").str();
+}
+
+std::string CppWriter::getCast(const StringRef &s, const Type *t) {
+  switch (t->getTypeID()) {
+  default:
+    assert(false && "Unsupported type");
+  case Type::FloatTyID:
+    // TODO return ("Math_fround(" + s + ")").str();
+  case Type::DoubleTyID:
+    return ("+" + s).str();
+  case Type::IntegerTyID:
+  case Type::PointerTyID:
+    return (s + "|0").str();
+  }
+}
+
+std::string CppWriter::getParenCast(const StringRef &s, const Type *t) {
+  return getCast(("(" + s + ")").str(), t);
+}
+
 // printConstant - Print out a constant pool entry...
 void CppWriter::printConstant(const Constant *CV) {
   // First, if the constant is actually a GlobalValue (variable or function)
@@ -721,20 +788,16 @@ void CppWriter::printConstant(const Constant *CV) {
   std::string constName(getCppName(CV));
   std::string typeName(getCppName(CV->getType()));
 
+  //Out << "var " << constName << " = ";
+
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     std::string constValue = CI->getValue().toString(10, true);
-    Out << "ConstantInt* " << constName
-        << " = ConstantInt::get(mod->getContext(), APInt("
-        << cast<IntegerType>(CI->getType())->getBitWidth()
-        << ", StringRef(\"" <<  constValue << "\"), 10));";
+    Out << constValue << ";";
   } else if (isa<ConstantAggregateZero>(CV)) {
-    Out << "ConstantAggregateZero* " << constName
-        << " = ConstantAggregateZero::get(" << typeName << ");";
+    Out << "ConstantAggregateZero::get(" << typeName << ");";
   } else if (isa<ConstantPointerNull>(CV)) {
-    Out << "ConstantPointerNull* " << constName
-        << " = ConstantPointerNull::get(" << typeName << ");";
+    Out << "ConstantPointerNull::get(" << typeName << ");";
   } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    Out << "ConstantFP* " << constName << " = ";
     printCFP(CFP);
     Out << ";";
   } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
@@ -779,20 +842,13 @@ void CppWriter::printConstant(const Constant *CV) {
   } else if (const ConstantDataSequential *CDS =
                dyn_cast<ConstantDataSequential>(CV)) {
     if (CDS->isString()) {
-      Out << "Constant *" << constName <<
-      " = ConstantDataArray::getString(mod->getContext(), \"";
+      Out << "allocate([";
       StringRef Str = CDS->getAsString();
-      bool nullTerminate = false;
-      if (Str.back() == 0) {
-        Str = Str.drop_back();
-        nullTerminate = true;
+      for (unsigned int i = 0; i < Str.size(); i++) {
+        Out << (unsigned int)(Str.data()[i]);
+        if (i < Str.size()-1) Out << ",";
       }
-      printEscapedString(Str);
-      // Determine if we want null termination or not.
-      if (nullTerminate)
-        Out << "\", true);";
-      else
-        Out << "\", false);";// No null terminator
+      Out << "], 'i8', ALLOC_STATIC);";
     } else {
       // TODO: Could generate more efficient code generating CDS calls instead.
       Out << "std::vector<Constant*> " << constName << "_elems;";
@@ -813,19 +869,12 @@ void CppWriter::printConstant(const Constant *CV) {
     }
   } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     if (CE->getOpcode() == Instruction::GetElementPtr) {
-      Out << "std::vector<Constant*> " << constName << "_indices;";
-      nl(Out);
-      printConstant(CE->getOperand(0));
-      for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
-        printConstant(CE->getOperand(i));
-        Out << constName << "_indices.push_back("
-            << getCppName(CE->getOperand(i)) << ");";
-        nl(Out);
+      Out << "allocate([";
+      for (unsigned i = 0; i < CE->getNumOperands(); ++i ) {
+        Out << getCppName(CE->getOperand(i));
+        if (i < CE->getNumOperands()-1) Out << ",";
       }
-      Out << "Constant* " << constName
-          << " = ConstantExpr::getGetElementPtr("
-          << getCppName(CE->getOperand(0)) << ", "
-          << constName << "_indices);";
+      Out << "], 'i32', ALLOC_STATIC);";
     } else if (CE->isCast()) {
       printConstant(CE->getOperand(0));
       Out << "Constant* " << constName << " = ConstantExpr::getCast(";
@@ -936,8 +985,11 @@ void CppWriter::printConstants(const Module* M) {
   // Traverse all the global variables looking for constant initializers
   for (Module::const_global_iterator I = TheModule->global_begin(),
          E = TheModule->global_end(); I != E; ++I)
-    if (I->hasInitializer())
-      printConstant(I->getInitializer());
+    if (I->hasInitializer()) {
+      const Constant *CV = I->getInitializer();
+      allocateConstant(CV);
+      GlobalAddresses[I->getName().str()] = GlobalAddresses[getCppName(CV)];
+    }
 
   // Traverse the LLVM functions looking for constants
   for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
@@ -949,7 +1001,7 @@ void CppWriter::printConstants(const Module* M) {
            ++I) {
         for (unsigned i = 0; i < I->getNumOperands(); ++i) {
           if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
-            printConstant(C);
+            allocateConstant(C);
           }
         }
       }
@@ -958,91 +1010,19 @@ void CppWriter::printConstants(const Module* M) {
 }
 
 void CppWriter::printVariableUses(const GlobalVariable *GV) {
-  nl(Out) << "// Type Definitions";
-  nl(Out);
-  printType(GV->getType());
-  if (GV->hasInitializer()) {
-    const Constant *Init = GV->getInitializer();
-    printType(Init->getType());
-    if (const Function *F = dyn_cast<Function>(Init)) {
-      nl(Out)<< "/ Function Declarations"; nl(Out);
-      printFunctionHead(F);
-    } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
-      nl(Out) << "// Global Variable Declarations"; nl(Out);
-      printVariableHead(gv);
-      
-      nl(Out) << "// Global Variable Definitions"; nl(Out);
-      printVariableBody(gv);
-    } else  {
-      nl(Out) << "// Constant Definitions"; nl(Out);
-      printConstant(Init);
-    }
-  }
 }
 
 void CppWriter::printVariableHead(const GlobalVariable *GV) {
-  nl(Out) << "GlobalVariable* " << getCppName(GV);
-  if (is_inline) {
-    Out << " = mod->getGlobalVariable(mod->getContext(), ";
-    printEscapedString(GV->getName());
-    Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
-    nl(Out) << "if (!" << getCppName(GV) << ") {";
-    in(); nl(Out) << getCppName(GV);
-  }
-  Out << " = new GlobalVariable(/*Module=*/*mod, ";
-  nl(Out) << "/*Type=*/";
-  printCppName(GV->getType()->getElementType());
-  Out << ",";
-  nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
-  Out << ",";
-  nl(Out) << "/*Linkage=*/";
-  printLinkageType(GV->getLinkage());
-  Out << ",";
-  nl(Out) << "/*Initializer=*/0, ";
-  if (GV->hasInitializer()) {
-    Out << "// has initializer, specified below";
-  }
-  nl(Out) << "/*Name=*/\"";
-  printEscapedString(GV->getName());
-  Out << "\");";
-  nl(Out);
-
-  if (GV->hasSection()) {
-    printCppName(GV);
-    Out << "->setSection(\"";
-    printEscapedString(GV->getSection());
-    Out << "\");";
-    nl(Out);
-  }
-  if (GV->getAlignment()) {
-    printCppName(GV);
-    Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
-    nl(Out);
-  }
-  if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
-    printCppName(GV);
-    Out << "->setVisibility(";
-    printVisibilityType(GV->getVisibility());
-    Out << ");";
-    nl(Out);
-  }
-  if (GV->isThreadLocal()) {
-    printCppName(GV);
-    Out << "->setThreadLocalMode(";
-    printThreadLocalMode(GV->getThreadLocalMode());
-    Out << ");";
-    nl(Out);
-  }
-  if (is_inline) {
-    out(); Out << "}"; nl(Out);
-  }
+  Out << "var ";
+  printCppName(GV);
+  Out << ";\n";
 }
 
 void CppWriter::printVariableBody(const GlobalVariable *GV) {
   if (GV->hasInitializer()) {
     printCppName(GV);
-    Out << "->setInitializer(";
-    Out << getCppName(GV->getInitializer()) << ");";
+    Out << " = ";
+    Out << getCppName(GV->getInitializer()) << ";";
     nl(Out);
   }
 }
@@ -1091,9 +1071,98 @@ static StringRef ConvertAtomicSynchScope(SynchronizationScope SynchScope) {
   llvm_unreachable("Unknown synch scope");
 }
 
-// printInstruction - This member is called for each Instruction in a function.
-void CppWriter::printInstruction(const Instruction *I,
-                                 const std::string& bbname) {
+std::string CppWriter::getPtrLoad(const Value* Ptr) {
+  Type *t = cast<PointerType>(Ptr->getType())->getElementType();
+  return getCast(getPtrUse(Ptr), t);
+}
+
+std::string CppWriter::getPtrUse(const Value* Ptr) {
+  Type *t = cast<PointerType>(Ptr->getType())->getElementType();
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+    std::string text = "";
+    unsigned Addr = getGlobalAddress(GV->getName().str());
+    switch (t->getTypeID()) {
+    default:
+      assert(false && "Unsupported type");
+    case Type::DoubleTyID:
+      return "HEAPF64[" + utostr(Addr >> 3) + "]";
+    case Type::FloatTyID:
+      return "HEAPF32[" + utostr(Addr >> 2) + "]";
+    case Type::IntegerTyID:
+      return "HEAP32[" + utostr(Addr >> 2) + "]";
+    }
+  } else {
+    switch (t->getTypeID()) {
+    default:
+      assert(false && "Unsupported type");
+    case Type::DoubleTyID:
+      return "HEAPF64[" + getOpName(Ptr) + ">>3]";
+    case Type::FloatTyID:
+      return "HEAPF32[" + getOpName(Ptr) + ">>2]";
+    case Type::IntegerTyID:
+    case Type::PointerTyID:
+      return "HEAP32[" + getOpName(Ptr) + ">>2]";
+    }
+  }
+}
+
+std::string CppWriter::getPtr(const Value* Ptr) {
+  Type *t = cast<PointerType>(Ptr->getType())->getElementType();
+  if (const Constant *CV = dyn_cast<Constant>(Ptr)) {
+    std::string text = "";
+    unsigned Addr = getGlobalAddress(CV->getName().str());
+    switch (t->getTypeID()) {
+    default:
+      assert(false && "Unsupported type");
+    case Type::DoubleTyID:
+      return utostr(Addr >> 3);
+    case Type::FloatTyID:
+      return utostr(Addr >> 2);
+    case Type::ArrayTyID:
+    case Type::StructTyID:
+    case Type::PointerTyID:
+    case Type::VectorTyID:
+    case Type::IntegerTyID:
+      return utostr(Addr >> 2);
+    }
+  } else {
+    return getOpName(Ptr) + "|0";
+  }
+}
+
+std::string CppWriter::getConstant(const Constant* CV) {
+  if (/* const PointerType *Ptr = */ dyn_cast<PointerType>(CV->getType())) {
+    return getPtr(CV);
+  } else {
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+      std::string S = ftostr(CFP->getValueAPF());
+      S = '+' + S;
+//      if (S.find('.') == S.npos) { TODO: do this when necessary, but it is necessary even for 0.0001
+      return S;
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+      return CI->getValue().toString(10, true);
+    } else {
+      assert(false);
+    }
+  }
+}
+
+std::string CppWriter::getValueAsStr(const Value* V) {
+  if (const Constant *CV = dyn_cast<Constant>(V)) {
+    return getConstant(CV);
+  } else {
+    return getCast(getCppName(V), V->getType());
+  }
+}
+
+std::string CppWriter::getValueAsParenStr(const Value* V) {
+  return "(" + getValueAsStr(V) + ")";
+}
+
+// generateInstruction - This member is called for each Instruction in a function.
+std::string CppWriter::generateInstruction(const Instruction *I) {
+  std::string text = "NYI: " + std::string(I->getOpcodeName());
+  std::string bbname = "NO_BBNAME";
   std::string iName(getCppName(I));
 
   // Before we emit this instruction, we need to take care of generating any
@@ -1110,24 +1179,18 @@ void CppWriter::printInstruction(const Instruction *I,
 
   case Instruction::Ret: {
     const ReturnInst* ret =  cast<ReturnInst>(I);
-    Out << "ReturnInst::Create(mod->getContext(), "
-        << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+    Value *RV = ret->getReturnValue();
+    text = "STACKTOP = sp;";
+    text += "return";
+    if (RV == NULL) {
+      text += ";";
+    } else {
+      text += " " + getValueAsStr(RV) + ";";
+    }
     break;
   }
   case Instruction::Br: {
-    const BranchInst* br = cast<BranchInst>(I);
-    Out << "BranchInst::Create(" ;
-    if (br->getNumOperands() == 3) {
-      Out << opNames[2] << ", "
-          << opNames[1] << ", "
-          << opNames[0] << ", ";
-
-    } else if (br->getNumOperands() == 1) {
-      Out << opNames[0] << ", ";
-    } else {
-      error("Branch with 2 operands?");
-    }
-    Out << bbname << ");";
+    text = "";
     break;
   }
   case Instruction::Switch: {
@@ -1213,14 +1276,15 @@ void CppWriter::printInstruction(const Instruction *I,
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:{
-    Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+    //Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+    text = getAssign(iName, Type::getInt32Ty(I->getContext()));
     switch (I->getOpcode()) {
-    case Instruction::Add: Out << "Instruction::Add"; break;
+    case Instruction::Add: text += getParenCast(getValueAsParenStr(I->getOperand(0)) + " + " + getValueAsParenStr(I->getOperand(1)), Type::getInt32Ty(I->getContext())) + ";"; break;
     case Instruction::FAdd: Out << "Instruction::FAdd"; break;
-    case Instruction::Sub: Out << "Instruction::Sub"; break;
+    case Instruction::Sub: text += getParenCast(getValueAsParenStr(I->getOperand(0)) + " - " + getValueAsParenStr(I->getOperand(1)), Type::getInt32Ty(I->getContext())) + ";"; break;
     case Instruction::FSub: Out << "Instruction::FSub"; break;
     case Instruction::Mul: Out << "Instruction::Mul"; break;
-    case Instruction::FMul: Out << "Instruction::FMul"; break;
+    case Instruction::FMul: text += getParenCast(getValueAsStr(I->getOperand(0)) + " * " + getValueAsStr(I->getOperand(1)), I->getType()) + ";"; break;
     case Instruction::UDiv:Out << "Instruction::UDiv"; break;
     case Instruction::SDiv:Out << "Instruction::SDiv"; break;
     case Instruction::FDiv:Out << "Instruction::FDiv"; break;
@@ -1235,9 +1299,9 @@ void CppWriter::printInstruction(const Instruction *I,
     case Instruction::AShr:Out << "Instruction::AShr"; break;
     default: Out << "Instruction::BadOpCode"; break;
     }
-    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
-    printEscapedString(I->getName());
-    Out << "\", " << bbname << ");";
+    //Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    //printEscapedString(I->getName());
+    //Out << "\", " << bbname << ");";
     break;
   }
   case Instruction::FCmp: {
@@ -1267,96 +1331,59 @@ void CppWriter::printInstruction(const Instruction *I,
     break;
   }
   case Instruction::ICmp: {
-    Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+    text = getAssign(iName, Type::getInt32Ty(I->getContext())) + "(" + getValueAsStr(I->getOperand(0)) + ")";
     switch (cast<ICmpInst>(I)->getPredicate()) {
-    case ICmpInst::ICMP_EQ:  Out << "ICmpInst::ICMP_EQ";  break;
-    case ICmpInst::ICMP_NE:  Out << "ICmpInst::ICMP_NE";  break;
-    case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
-    case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
-    case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
-    case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
-    case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
-    case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
-    case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
-    case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
-    default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+    case ICmpInst::ICMP_EQ:  text += "==";  break;
+    case ICmpInst::ICMP_NE:  text += "!=";  break;
+    // TODO: handle signed and unsigned
+    case ICmpInst::ICMP_ULE: text += "<="; break;
+    case ICmpInst::ICMP_SLE: text += "<="; break;
+    case ICmpInst::ICMP_UGE: text += ">="; break;
+    case ICmpInst::ICMP_SGE: text += ">="; break;
+    case ICmpInst::ICMP_ULT: text += "<"; break;
+    case ICmpInst::ICMP_SLT: text += "<"; break;
+    case ICmpInst::ICMP_UGT: text += ">"; break;
+    case ICmpInst::ICMP_SGT: text += ">"; break;
+    default: text += "ICmpInst::BAD_ICMP_PREDICATE"; break;
     }
-    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
-    printEscapedString(I->getName());
-    Out << "\");";
+    text += "(" + getValueAsStr(I->getOperand(1)) + ")";
     break;
   }
   case Instruction::Alloca: {
     const AllocaInst* allocaI = cast<AllocaInst>(I);
-    Out << "AllocaInst* " << iName << " = new AllocaInst("
-        << getCppName(allocaI->getAllocatedType()) << ", ";
-    if (allocaI->isArrayAllocation())
-      Out << opNames[0] << ", ";
-    Out << "\"";
-    printEscapedString(allocaI->getName());
-    Out << "\", " << bbname << ");";
-    if (allocaI->getAlignment())
-      nl(Out) << iName << "->setAlignment("
-          << allocaI->getAlignment() << ");";
+    Type *t = allocaI->getAllocatedType();
+    unsigned size;
+    if (ArrayType *AT = dyn_cast<ArrayType>(t)) {
+      size = AT->getElementType()->getScalarSizeInBits()/8 * AT->getNumElements();
+    } else {
+      size = t->getScalarSizeInBits()/8;
+    }
+    text = getAssign(iName, Type::getInt32Ty(I->getContext())) + "STACKTOP; STACKTOP = STACKTOP + " + Twine(stackAlign(size)).str() + "|0;";
     break;
   }
   case Instruction::Load: {
-    const LoadInst* load = cast<LoadInst>(I);
-    Out << "LoadInst* " << iName << " = new LoadInst("
-        << opNames[0] << ", \"";
-    printEscapedString(load->getName());
-    Out << "\", " << (load->isVolatile() ? "true" : "false" )
-        << ", " << bbname << ");";
-    if (load->getAlignment())
-      nl(Out) << iName << "->setAlignment("
-              << load->getAlignment() << ");";
-    if (load->isAtomic()) {
-      StringRef Ordering = ConvertAtomicOrdering(load->getOrdering());
-      StringRef CrossThread = ConvertAtomicSynchScope(load->getSynchScope());
-      nl(Out) << iName << "->setAtomic("
-              << Ordering << ", " << CrossThread << ");";
-    }
+    const LoadInst *LI = cast<LoadInst>(I);
+    const Value *Ptr = LI->getPointerOperand();
+    Type *t = cast<PointerType>(Ptr->getType())->getElementType();
+    text = getAssign(iName, t) + getPtrLoad(Ptr) + ";";
     break;
   }
   case Instruction::Store: {
-    const StoreInst* store = cast<StoreInst>(I);
-    Out << "StoreInst* " << iName << " = new StoreInst("
-        << opNames[0] << ", "
-        << opNames[1] << ", "
-        << (store->isVolatile() ? "true" : "false")
-        << ", " << bbname << ");";
-    if (store->getAlignment())
-      nl(Out) << iName << "->setAlignment("
-              << store->getAlignment() << ");";
-    if (store->isAtomic()) {
-      StringRef Ordering = ConvertAtomicOrdering(store->getOrdering());
-      StringRef CrossThread = ConvertAtomicSynchScope(store->getSynchScope());
-      nl(Out) << iName << "->setAtomic("
-              << Ordering << ", " << CrossThread << ");";
+    const StoreInst *SI = cast<StoreInst>(I);
+    const Value *P = SI->getPointerOperand();
+    const Value *V = SI->getValueOperand();
+    std::string VS = getValueAsStr(V);
+    if (V->getType()->isDoubleTy() && SI->getAlignment() == 4) {
+      // only 4-byte aligned, copy carefully
+      std::string PS = getOpName(P);
+      text = "HEAPF64[tempDoublePtr>>3]=" + VS + ";HEAPF32[" + PS + ">>2]=HEAPF32[tempDoublePtr>>2];HEAPF32[" + PS + "+4>>2]=HEAPF32[tempDoublePtr+4>>2];";
+    } else {
+      text = getPtrUse(P) + " = " + VS + ";";
     }
     break;
   }
   case Instruction::GetElementPtr: {
-    const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
-    if (gep->getNumOperands() <= 2) {
-      Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
-          << opNames[0];
-      if (gep->getNumOperands() == 2)
-        Out << ", " << opNames[1];
-    } else {
-      Out << "std::vector<Value*> " << iName << "_indices;";
-      nl(Out);
-      for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
-        Out << iName << "_indices.push_back("
-            << opNames[i] << ");";
-        nl(Out);
-      }
-      Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
-          << opNames[0] << ", " << iName << "_indices";
-    }
-    Out << ", \"";
-    printEscapedString(gep->getName());
-    Out << "\", " << bbname << ");";
+    assert(false && "Unhandled instruction");
     break;
   }
   case Instruction::PHI: {
@@ -1376,6 +1403,18 @@ void CppWriter::printInstruction(const Instruction *I,
     }
     break;
   }
+  case Instruction::PtrToInt:
+    text = getAssign(iName, Type::getInt32Ty(I->getContext()));
+    if (const Constant *CV = dyn_cast<Constant>(I->getOperand(0))) {
+      text += utostr(getGlobalAddress(CV->getName().str()));
+    } else {
+      text += getCast(opNames[0], Type::getInt32Ty(I->getContext()));
+    }
+    text += ";";
+    break;
+  case Instruction::IntToPtr:
+    text = getAssign(iName, Type::getInt32Ty(I->getContext())) + opNames[0] + ";";
+    break;
   case Instruction::Trunc:
   case Instruction::ZExt:
   case Instruction::SExt:
@@ -1385,71 +1424,45 @@ void CppWriter::printInstruction(const Instruction *I,
   case Instruction::FPToSI:
   case Instruction::UIToFP:
   case Instruction::SIToFP:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
   case Instruction::BitCast: {
-    const CastInst* cst = cast<CastInst>(I);
-    Out << "CastInst* " << iName << " = new ";
     switch (I->getOpcode()) {
     case Instruction::Trunc:    Out << "TruncInst"; break;
     case Instruction::ZExt:     Out << "ZExtInst"; break;
     case Instruction::SExt:     Out << "SExtInst"; break;
     case Instruction::FPTrunc:  Out << "FPTruncInst"; break;
-    case Instruction::FPExt:    Out << "FPExtInst"; break;
+    case Instruction::FPExt:    text = getAssign(iName, Type::getFloatTy(I->getContext())) + opNames[0] + ";"; break;
     case Instruction::FPToUI:   Out << "FPToUIInst"; break;
     case Instruction::FPToSI:   Out << "FPToSIInst"; break;
     case Instruction::UIToFP:   Out << "UIToFPInst"; break;
-    case Instruction::SIToFP:   Out << "SIToFPInst"; break;
     case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
     case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
-    case Instruction::BitCast:  Out << "BitCastInst"; break;
+    case Instruction::SIToFP:
+    case Instruction::BitCast:  text = getAssign(iName, I->getOperand(0)->getType()) + getValueAsStr(I->getOperand(0)) + ";"; break;
     default: llvm_unreachable("Unreachable");
     }
-    Out << "(" << opNames[0] << ", "
-        << getCppName(cst->getType()) << ", \"";
-    printEscapedString(cst->getName());
-    Out << "\", " << bbname << ");";
     break;
   }
   case Instruction::Call: {
     const CallInst* call = cast<CallInst>(I);
-    if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
-      Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
-          << getCppName(ila->getFunctionType()) << ", \""
-          << ila->getAsmString() << "\", \""
-          << ila->getConstraintString() << "\","
-          << (ila->hasSideEffects() ? "true" : "false") << ");";
-      nl(Out);
-    }
-    if (call->getNumArgOperands() > 1) {
-      Out << "std::vector<Value*> " << iName << "_params;";
-      nl(Out);
-      for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
-        Out << iName << "_params.push_back(" << opNames[i] << ");";
-        nl(Out);
+    const int numArgs = call->getNumArgOperands();
+    Type *RT = call->getCalledFunction()->getReturnType();
+    text = opNames[numArgs] + "(";
+    for (int i = 0; i < numArgs; i++) {
+      Value *Arg = call->getArgOperand(i);
+      Type *t = dyn_cast<PointerType>(Arg->getType());
+      const GlobalVariable *GV;
+      if (t && (GV = dyn_cast<GlobalVariable>(Arg))) {
+        text += utostr(getGlobalAddress(GV->getName().str()));
+      } else {
+        text += getValueAsStr(call->getArgOperand(i));
       }
-      Out << "CallInst* " << iName << " = CallInst::Create("
-          << opNames[call->getNumArgOperands()] << ", "
-          << iName << "_params, \"";
-    } else if (call->getNumArgOperands() == 1) {
-      Out << "CallInst* " << iName << " = CallInst::Create("
-          << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
-    } else {
-      Out << "CallInst* " << iName << " = CallInst::Create("
-          << opNames[call->getNumArgOperands()] << ", \"";
+      if (i < numArgs - 1) text += ", ";
     }
-    printEscapedString(call->getName());
-    Out << "\", " << bbname << ");";
-    nl(Out) << iName << "->setCallingConv(";
-    printCallingConv(call->getCallingConv());
-    Out << ");";
-    nl(Out) << iName << "->setTailCall("
-        << (call->isTailCall() ? "true" : "false");
-    Out << ");";
-    nl(Out);
-    printAttributes(call->getAttributes(), iName);
-    Out << iName << "->setAttributes(" << iName << "_PAL);";
-    nl(Out);
+    text += ")";
+    if (!RT->isVoidTy()) {
+      text = getAssign(iName, RT) + getCast(text, RT);
+    }
+    text += ";";
     break;
   }
   case Instruction::Select: {
@@ -1591,8 +1604,8 @@ void CppWriter::printInstruction(const Instruction *I,
   }
   }
   DefinedValues.insert(I);
-  nl(Out);
   delete [] opNames;
+  return text;
 }
 
 // Print out the types, constants and declarations needed by one function
@@ -1654,42 +1667,6 @@ void CppWriter::printFunctionUses(const Function* F) {
     }
   }
 
-  // Print the function declarations for any functions encountered
-  nl(Out) << "// Function Declarations"; nl(Out);
-  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
-       I != E; ++I) {
-    if (Function* Fun = dyn_cast<Function>(*I)) {
-      if (!is_inline || Fun != F)
-        printFunctionHead(Fun);
-    }
-  }
-
-  // Print the global variable declarations for any variables encountered
-  nl(Out) << "// Global Variable Declarations"; nl(Out);
-  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
-       I != E; ++I) {
-    if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
-      printVariableHead(F);
-  }
-
-  // Print the constants found
-  nl(Out) << "// Constant Definitions"; nl(Out);
-  for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
-         E = consts.end(); I != E; ++I) {
-    printConstant(*I);
-  }
-
-  // Process the global variables definitions now that all the constants have
-  // been emitted. These definitions just couple the gvars with their constant
-  // initializers.
-  if (GenerationType != GenFunction) {
-    nl(Out) << "// Global Variable Definitions"; nl(Out);
-    for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
-         I != E; ++I) {
-      if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
-        printVariableBody(GV);
-    }
-  }
 }
 
 void CppWriter::printFunctionHead(const Function* F) {
@@ -1745,61 +1722,117 @@ void CppWriter::printFunctionHead(const Function* F) {
 }
 
 void CppWriter::printFunctionBody(const Function *F) {
-  if (F->isDeclaration())
-    return; // external functions have no bodies.
+  assert(!F->isDeclaration());
 
   // Clear the DefinedValues and ForwardRefs maps because we can't have
   // cross-function forward refs
   ForwardRefs.clear();
   DefinedValues.clear();
 
+  UsedVars.clear();
+
   // Create all the argument values
   if (!is_inline) {
-    if (!F->arg_empty()) {
-      Out << "Function::arg_iterator args = " << getCppName(F)
-          << "->arg_begin();";
-      nl(Out);
-    }
     for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
          AI != AE; ++AI) {
-      Out << "Value* " << getCppName(AI) << " = args++;";
-      nl(Out);
       if (AI->hasName()) {
-        Out << getCppName(AI) << "->setName(\"";
-        printEscapedString(AI->getName());
-        Out << "\");";
-        nl(Out);
+        //Out << getCppName(AI) << "->setName(\"";
+        //printEscapedString(AI->getName());
+        //Out << "\");";
+        //nl(Out);
+      } else {
       }
     }
   }
 
-  // Create all the basic blocks
-  nl(Out);
+  // Prepare relooper TODO: resize buffer as needed
+  #define RELOOPER_BUFFER 10*1024*1024
+  static char *buffer = new char[RELOOPER_BUFFER];
+  Relooper::SetOutputBuffer(buffer, RELOOPER_BUFFER);
+  Relooper R;
+  Block *Entry = NULL;
+  std::map<const BasicBlock*, Block*> LLVMToRelooper;
+
+  // Create relooper blocks with their contents
   for (Function::const_iterator BI = F->begin(), BE = F->end();
        BI != BE; ++BI) {
-    std::string bbname(getCppName(BI));
-    Out << "BasicBlock* " << bbname <<
-           " = BasicBlock::Create(mod->getContext(), \"";
-    if (BI->hasName())
-      printEscapedString(BI->getName());
-    Out << "\"," << getCppName(BI->getParent()) << ",0);";
-    nl(Out);
+    std::string contents = "";
+    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+         I != E; ++I) {
+      contents += " " + generateInstruction(I) + "\n";
+    }
+    Block *Curr = new Block(contents.c_str(), NULL); // TODO: use branch vars so we get switches
+    const BasicBlock *BB = &*BI;
+    LLVMToRelooper[BB] = Curr;
+    R.AddBlock(Curr);
+    if (!Entry) Entry = Curr;
   }
 
-  // Output all of its basic blocks... for the function
+  // Create branchings
   for (Function::const_iterator BI = F->begin(), BE = F->end();
        BI != BE; ++BI) {
-    std::string bbname(getCppName(BI));
-    nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
-    nl(Out);
+    const TerminatorInst *TI = BI->getTerminator();
+    switch (TI->getOpcode()) {
+    default: {
+      //error("Invalid branch instruction");
+      break;
+    }
+    case Instruction::Br: {
+      const BranchInst* br = cast<BranchInst>(TI);
+      if (br->getNumOperands() == 3) {
+        BasicBlock *S0 = br->getSuccessor(0);
+        BasicBlock *S1 = br->getSuccessor(1);
+        LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S0], NULL);
+        LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S1],
+            getOpName(TI->getOperand(0)).c_str());
+      } else if (br->getNumOperands() == 1) {
+        BasicBlock *S = br->getSuccessor(0);
+        LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S], NULL);
+      } else {
+        error("Branch with 2 operands?");
+      }
+      break;
+    }
+    }
+  }
 
-    // Output all of the instructions in the basic block...
-    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
-         I != E; ++I) {
-      printInstruction(I,bbname);
+  // Calculate relooping and print
+  R.Calculate(Entry);
+  R.Render();
+
+  // Emit local variables
+  UsedVars["sp"] = Type::getInt32Ty(F->getContext())->getTypeID();
+  if (!UsedVars.empty()) {
+    Out << " var ";
+    for (VarMap::iterator VI = UsedVars.begin(); VI != UsedVars.end(); ++VI) {
+      if (VI != UsedVars.begin()) {
+        Out << ", ";
+      }
+      Out << VI->first << " = ";
+      switch (VI->second) {
+        default:
+          assert(false);
+        case Type::PointerTyID:
+        case Type::IntegerTyID:
+          Out << "0";
+          break;
+        case Type::FloatTyID:
+          // TODO Out << "Math_fround(0)";
+        case Type::DoubleTyID:
+          Out << "+0"; // FIXME
+          break;
+      }
     }
+    Out << ";";
+    nl(Out);
   }
 
+  // Emit stack entry
+  Out << " " + getAssign("sp", Type::getInt32Ty(F->getContext())) + "STACKTOP;";
+
+  // Emit (relooped) code
+  nl(Out) << buffer;
+
   // Loop over the ForwardRefs and resolve them now that all instructions
   // are generated.
   if (!ForwardRefs.empty()) {
@@ -1846,116 +1879,187 @@ void CppWriter::printInline(const std::string& fname,
 }
 
 void CppWriter::printModuleBody() {
-  // Print out all the type definitions
-  nl(Out) << "// Type Definitions"; nl(Out);
-  printTypes(TheModule);
+  // Calculate the constants definitions.
+  printConstants(TheModule);
 
-  // Functions can call each other and global variables can reference them so
-  // define all the functions first before emitting their function bodies.
-  nl(Out) << "// Function Declarations"; nl(Out);
+  // Emit function bodies.
+  nl(Out) << "// EMSCRIPTEN_START_FUNCTIONS"; nl(Out);
   for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
-       I != E; ++I)
-    printFunctionHead(I);
+       I != E; ++I) {
+    if (!I->isDeclaration()) {
+      Out << "function _" << I->getName() << "(";
+      for (Function::const_arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+           AI != AE; ++AI) {
+        if (AI != I->arg_begin()) Out << ",";
+        Out << getCppName(AI);
+      }
+      Out << ") {";
+      nl(Out);
+      for (Function::const_arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+           AI != AE; ++AI) {
+        std::string name = getCppName(AI);
+        Out << " " << name << " = " << getCast(name, AI->getType()) << ";";
+        nl(Out);
+      }
+      printFunctionBody(I);
+      Out << "}";
+      nl(Out);
+    }
+  }
+  Out << "// EMSCRIPTEN_END_FUNCTIONS\n\n";
 
-  // Process the global variables declarations. We can't initialze them until
-  // after the constants are printed so just print a header for each global
-  nl(Out) << "// Global Variable Declarations\n"; nl(Out);
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    printVariableHead(I);
+  // TODO fix commas
+  Out << "/* memory initializer */ allocate([";
+  printCommaSeparated(GlobalData64);
+  if (GlobalData64.size() > 0 && GlobalData32.size() > 0) {
+    Out << ",";
+  }
+  printCommaSeparated(GlobalData32);
+  if (GlobalData32.size() > 0 && GlobalData8.size() > 0) {
+    Out << ",";
   }
+  printCommaSeparated(GlobalData8);
+  Out << "], \"i8\", ALLOC_NONE, Runtime.GLOBAL_BASE);";
 
-  // Print out all the constants definitions. Constants don't recurse except
-  // through GlobalValues. All GlobalValues have been declared at this point
-  // so we can proceed to generate the constants.
-  nl(Out) << "// Constant Definitions"; nl(Out);
-  printConstants(TheModule);
+  // Emit metadata for emcc driver
+  Out << "\n\n// EMSCRIPTEN_METADATA\n";
+  Out << "{\n";
 
-  // Process the global variables definitions now that all the constants have
-  // been emitted. These definitions just couple the gvars with their constant
-  // initializers.
-  nl(Out) << "// Global Variable Definitions"; nl(Out);
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    printVariableBody(I);
+  Out << "\"declares\": [";
+  bool first = true;
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I) {
+    if (I->isDeclaration()) {
+      if (first) {
+        first = false;
+      } else {
+        Out << ", ";
+      }
+      Out << "\"" + I->getName() + "\"";
+    }
   }
-
-  // Finally, we can safely put out all of the function bodies.
-  nl(Out) << "// Function Definitions"; nl(Out);
+  Out << "],";
+  Out << "\"implementedFunctions\": [";
+  first = true;
   for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
        I != E; ++I) {
     if (!I->isDeclaration()) {
-      nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
-              << ")";
-      nl(Out) << "{";
-      nl(Out,1);
-      printFunctionBody(I);
-      nl(Out,-1) << "}";
-      nl(Out);
+      if (first) {
+        first = false;
+      } else {
+        Out << ", ";
+      }
+      Out << "\"_" << I->getName() << '"';
     }
   }
+  Out << "]";
+
+  Out << "\n}\n";
+}
+
+#include <iostream>
+
+void CppWriter::allocateConstant(const Constant* CV) {
+  if (isa<GlobalValue>(CV))
+    return;
+
+  std::string name = getCppName(CV);
+  if (const ConstantDataSequential *CDS =
+         dyn_cast<ConstantDataSequential>(CV)) {
+    if (CDS->isString()) {
+      GlobalAddresses[name] = Address(GlobalData8.size(), 8);
+      StringRef Str = CDS->getAsString();
+      for (unsigned int i = 0; i < Str.size(); i++) {
+        GlobalData8.push_back(Str.data()[i]);
+      }
+    } else {
+      assert(false);
+    }
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    APFloat APF = CFP->getValueAPF();
+    if (CFP->getType() == Type::getFloatTy(CFP->getContext())) {
+      GlobalAddresses[name] = Address(GlobalData32.size(), 32);
+      union flt { float f; unsigned char b[sizeof(float)]; } flt;
+      flt.f = APF.convertToFloat();
+      for (unsigned i = 0; i < sizeof(float); ++i)
+        GlobalData32.push_back(flt.b[i]);
+    } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) {
+      GlobalAddresses[name] = Address(GlobalData64.size(), 64);
+      union dbl { double d; unsigned char b[sizeof(double)]; } dbl;
+      dbl.d = APF.convertToDouble();
+      for (unsigned i = 0; i < sizeof(double); ++i)
+        GlobalData64.push_back(dbl.b[i]);
+    } else {
+      assert(false);
+    }
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    union { uint64_t i; unsigned char b[sizeof(uint64_t)]; } integer;
+    integer.i = *CI->getValue().getRawData();
+    unsigned BitWidth = CI->getValue().getBitWidth();
+    assert(BitWidth == 32 || BitWidth == 64);
+    HeapData *GlobalData = NULL;
+    switch (BitWidth) {
+      case 32:
+        GlobalData = &GlobalData32;
+        break;
+      case 64:
+        GlobalData = &GlobalData64;
+        break;
+      default:
+        assert(false);
+    }
+    // assuming compiler is little endian
+    GlobalAddresses[name] = Address(GlobalData->size(), BitWidth);
+    for (unsigned i = 0; i < BitWidth / 8; ++i) {
+      GlobalData->push_back(integer.b[i]);
+    }
+  } else if (/* const ConstantPointerNull *CPN = */ dyn_cast<ConstantPointerNull>(CV)) {
+    assert(false);
+  } else if (/* const ConstantAggregateZero *CAZ = */ dyn_cast<ConstantAggregateZero>(CV)) {
+    printf("Warning: ignoring CAZ\n");
+    //Constant *C = CAZ->getSequentialElement();
+  } else if (/* const ConstantArray *CA = */ dyn_cast<ConstantArray>(CV)) {
+    assert(false);
+  } else if (/* const ConstantStruct *CS = */ dyn_cast<ConstantStruct>(CV)) {
+    assert(false);
+  } else if (/* const ConstantVector *CVec = */ dyn_cast<ConstantVector>(CV)) {
+    assert(false);
+  } else if (/* const BlockAddress *BA = */ dyn_cast<BlockAddress>(CV)) {
+    assert(false);
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+        assert(false && "Unhandled CE GEP");
+    } else if (CE->isCast()) {
+        assert(false && "Unhandled cast");
+    } else {
+        assert(false);
+    }
+  } else if (/* const UndefValue *UV = */ dyn_cast<UndefValue>(CV)) {
+    assert(false);
+  } else {
+    std::cout << getCppName(CV) << std::endl;
+    assert(false);
+  }
+}
+
+void CppWriter::printCommaSeparated(const HeapData data) {
+  for (HeapData::const_iterator I = data.begin();
+       I != data.end(); ++I) {
+    if (I != data.begin()) {
+      Out << ",";
+    }
+    Out << (int)*I;
+  }
 }
 
 void CppWriter::printProgram(const std::string& fname,
                              const std::string& mName) {
-  Out << "#include <llvm/Pass.h>\n";
-  Out << "#include <llvm/PassManager.h>\n";
-
-  Out << "#include <llvm/ADT/SmallVector.h>\n";
-  Out << "#include <llvm/Analysis/Verifier.h>\n";
-  Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
-  Out << "#include <llvm/IR/BasicBlock.h>\n";
-  Out << "#include <llvm/IR/CallingConv.h>\n";
-  Out << "#include <llvm/IR/Constants.h>\n";
-  Out << "#include <llvm/IR/DerivedTypes.h>\n";
-  Out << "#include <llvm/IR/Function.h>\n";
-  Out << "#include <llvm/IR/GlobalVariable.h>\n";
-  Out << "#include <llvm/IR/InlineAsm.h>\n";
-  Out << "#include <llvm/IR/Instructions.h>\n";
-  Out << "#include <llvm/IR/LLVMContext.h>\n";
-  Out << "#include <llvm/IR/Module.h>\n";
-  Out << "#include <llvm/Support/FormattedStream.h>\n";
-  Out << "#include <llvm/Support/MathExtras.h>\n";
-  Out << "#include <algorithm>\n";
-  Out << "using namespace llvm;\n\n";
-  Out << "Module* " << fname << "();\n\n";
-  Out << "int main(int argc, char**argv) {\n";
-  Out << "  Module* Mod = " << fname << "();\n";
-  Out << "  verifyModule(*Mod, PrintMessageAction);\n";
-  Out << "  PassManager PM;\n";
-  Out << "  PM.add(createPrintModulePass(&outs()));\n";
-  Out << "  PM.run(*Mod);\n";
-  Out << "  return 0;\n";
-  Out << "}\n\n";
   printModule(fname,mName);
 }
 
 void CppWriter::printModule(const std::string& fname,
                             const std::string& mName) {
-  nl(Out) << "Module* " << fname << "() {";
-  nl(Out,1) << "// Module Construction";
-  nl(Out) << "Module* mod = new Module(\"";
-  printEscapedString(mName);
-  Out << "\", getGlobalContext());";
-  if (!TheModule->getTargetTriple().empty()) {
-    nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
-  }
-  if (!TheModule->getTargetTriple().empty()) {
-    nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
-            << "\");";
-  }
-
-  if (!TheModule->getModuleInlineAsm().empty()) {
-    nl(Out) << "mod->setModuleInlineAsm(\"";
-    printEscapedString(TheModule->getModuleInlineAsm());
-    Out << "\");";
-  }
-  nl(Out);
-
   printModuleBody();
-  nl(Out) << "return mod;";
-  nl(Out,-1) << "}";
-  nl(Out);
 }
 
 void CppWriter::printContents(const std::string& fname,
@@ -2032,7 +2136,7 @@ bool CppWriter::runOnModule(Module &M) {
   TheModule = &M;
 
   // Emit a header
-  Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+  Out << "//========================================\n\n";
 
   // Get the name of the function we're supposed to generate
   std::string fname = FuncName.getValue();
diff --git a/lib/Target/CppBackend/Relooper.cpp b/lib/Target/CppBackend/Relooper.cpp
new file mode 100644
index 0000000000..07b3631141
--- /dev/null
+++ b/lib/Target/CppBackend/Relooper.cpp
@@ -0,0 +1,1287 @@
+
+#include "Relooper.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <list>
+#include <stack>
+
+#if EMSCRIPTEN
+#include "ministring.h"
+#else
+#include <string>
+typedef std::string ministring;
+#endif
+
+template <class T, class U> bool contains(const T& container, const U& contained) {
+  return container.find(contained) != container.end();
+}
+
+#if DEBUG
+static void PrintDebug(const char *Format, ...);
+#define DebugDump(x, ...) Debugging::Dump(x, __VA_ARGS__)
+#else
+#define PrintDebug(x, ...)
+#define DebugDump(x, ...)
+#endif
+
+#define INDENTATION 1
+
+struct Indenter {
+  static int CurrIndent;
+
+  static void Indent() { CurrIndent++; }
+  static void Unindent() { CurrIndent--; }
+};
+
+static void PrintIndented(const char *Format, ...);
+static void PutIndented(const char *String);
+
+static char *OutputBufferRoot = NULL;
+static char *OutputBuffer = NULL;
+static int OutputBufferSize = 0;
+
+void PrintIndented(const char *Format, ...) {
+  assert(OutputBuffer);
+  assert(OutputBuffer + Indenter::CurrIndent*INDENTATION - OutputBufferRoot < OutputBufferSize);
+  for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' ';
+  va_list Args;
+  va_start(Args, Format);
+  int left = OutputBufferSize - (OutputBuffer - OutputBufferRoot);
+  int written = vsnprintf(OutputBuffer, left, Format, Args);
+  assert(written < left);
+  OutputBuffer += written;
+  va_end(Args);
+}
+
+void PutIndented(const char *String) {
+  assert(OutputBuffer);
+  assert(OutputBuffer + Indenter::CurrIndent*INDENTATION - OutputBufferRoot < OutputBufferSize);
+  for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' ';
+  int left = OutputBufferSize - (OutputBuffer - OutputBufferRoot);
+  int needed = strlen(String)+1;
+  assert(needed < left);
+  strcpy(OutputBuffer, String);
+  OutputBuffer += strlen(String);
+  *OutputBuffer++ = '\n';
+  *OutputBuffer = 0;
+}
+
+static int AsmJS = 0;
+
+// Indenter
+
+#if EMSCRIPTEN
+int Indenter::CurrIndent = 1;
+#else
+int Indenter::CurrIndent = 0;
+#endif
+
+// Branch
+
+Branch::Branch(const char *ConditionInit, const char *CodeInit) : Ancestor(NULL), Labeled(true) {
+  Condition = ConditionInit ? strdup(ConditionInit) : NULL;
+  Code = CodeInit ? strdup(CodeInit) : NULL;
+}
+
+Branch::~Branch() {
+  if (Condition) free((void*)Condition);
+  if (Code) free((void*)Code);
+}
+
+void Branch::Render(Block *Target, bool SetLabel) {
+  if (Code) PrintIndented("%s\n", Code);
+  if (SetLabel) PrintIndented("label = %d;\n", Target->Id);
+  if (Ancestor) {
+    if (Type != Direct) {
+      if (Labeled) {
+        PrintIndented("%s L%d;\n", Type == Break ? "break" : "continue", Ancestor->Id);
+      } else {
+        PrintIndented("%s;\n", Type == Break ? "break" : "continue");
+      }
+    }
+  }
+}
+
+// Block
+
+int Block::IdCounter = 1; // 0 is reserved for clearings
+
+Block::Block(const char *CodeInit, const char *BranchVarInit) : Parent(NULL), Id(Block::IdCounter++), IsCheckedMultipleEntry(false) {
+  Code = strdup(CodeInit);
+  BranchVar = BranchVarInit ? strdup(BranchVarInit) : NULL;
+}
+
+Block::~Block() {
+  if (Code) free((void*)Code);
+  if (BranchVar) free((void*)BranchVar);
+  for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) {
+    delete iter->second;
+  }
+  // XXX If not reachable, expected to have branches here. But need to clean them up to prevent leaks!
+}
+
+void Block::AddBranchTo(Block *Target, const char *Condition, const char *Code) {
+  assert(!contains(BranchesOut, Target)); // cannot add more than one branch to the same target
+  BranchesOut[Target] = new Branch(Condition, Code);
+}
+
+void Block::Render(bool InLoop) {
+  if (IsCheckedMultipleEntry && InLoop) {
+    PrintIndented("label = 0;\n");
+  }
+
+  if (Code) {
+    // Print code in an indented manner, even over multiple lines
+    char *Start = const_cast<char*>(Code);
+    while (*Start) {
+      char *End = strchr(Start, '\n');
+      if (End) *End = 0;
+      PutIndented(Start);
+      if (End) *End = '\n'; else break;
+      Start = End+1;
+    }
+  }
+
+  if (!ProcessedBranchesOut.size()) return;
+
+  bool SetLabel = true; // in some cases it is clear we can avoid setting label, see later
+
+  // A setting of the label variable (label = x) is necessary if it can
+  // cause an impact. The main case is where we set label to x, then elsewhere
+  // we check if label is equal to that value, i.e., that label is an entry
+  // in a multiple block. We also need to reset the label when we enter
+  // that block, so that each setting is a one-time action: consider
+  //
+  //    while (1) {
+  //      if (check) label = 1;
+  //      if (label == 1) { label = 0 }
+  //    }
+  //
+  // (Note that this case is impossible due to fusing, but that is not
+  // material here.) So setting to 0 is important just to clear the 1 for
+  // future iterations.
+  // TODO: When inside a loop, if necessary clear the label variable
+  //       once on the top, and never do settings that are in effect clears
+
+  // Fusing: If the next is a Multiple, we can fuse it with this block. Note
+  // that we must be the Inner of a Simple, so fusing means joining a Simple
+  // to a Multiple. What happens there is that all options in the Multiple
+  // *must* appear in the Simple (the Simple is the only one reaching the
+  // Multiple), so we can remove the Multiple and add its independent groups
+  // into the Simple's branches.
+  MultipleShape *Fused = Shape::IsMultiple(Parent->Next);
+  if (Fused) {
+    PrintDebug("Fusing Multiple to Simple\n");
+    Parent->Next = Parent->Next->Next;
+    Fused->RenderLoopPrefix();
+
+    // When the Multiple has the same number of groups as we have branches,
+    // they will all be fused, so it is safe to not set the label at all
+    if (SetLabel && Fused->InnerMap.size() == ProcessedBranchesOut.size()) {
+      SetLabel = false;
+    }
+  }
+
+  Block *DefaultTarget(NULL); // The block we branch to without checking the condition, if none of the other conditions held.
+
+  // Find the default target, the one without a condition
+  for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) {
+    if (!iter->second->Condition) {
+      assert(!DefaultTarget); // Must be exactly one default
+      DefaultTarget = iter->first;
+    }
+  }
+  assert(DefaultTarget); // Since each block *must* branch somewhere, this must be set
+
+  bool useSwitch = BranchVar != NULL;
+
+  if (useSwitch) {
+    PrintIndented("switch (%s) {\n", BranchVar);
+  }
+
+  ministring RemainingConditions;
+  bool First = !useSwitch; // when using a switch, there is no special first
+  for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin();; iter++) {
+    Block *Target;
+    Branch *Details;
+    if (iter != ProcessedBranchesOut.end()) {
+      Target = iter->first;
+      if (Target == DefaultTarget) continue; // done at the end
+      Details = iter->second;
+      assert(Details->Condition); // must have a condition if this is not the default target
+    } else {
+      Target = DefaultTarget;
+      Details = ProcessedBranchesOut[DefaultTarget];
+    }
+    bool SetCurrLabel = SetLabel && Target->IsCheckedMultipleEntry;
+    bool HasFusedContent = Fused && contains(Fused->InnerMap, Target);
+    bool HasContent = SetCurrLabel || Details->Type != Branch::Direct || HasFusedContent || Details->Code;
+    if (iter != ProcessedBranchesOut.end()) {
+      // If there is nothing to show in this branch, omit the condition
+      if (useSwitch) {
+        PrintIndented("%s {\n", Details->Condition);
+      } else {
+        if (HasContent) {
+          PrintIndented("%sif (%s) {\n", First ? "" : "} else ", Details->Condition);
+          First = false;
+        } else {
+          if (RemainingConditions.size() > 0) RemainingConditions += " && ";
+          RemainingConditions += "!(";
+          if (BranchVar) {
+            RemainingConditions += BranchVar;
+            RemainingConditions += " == ";
+          }
+          RemainingConditions += Details->Condition;
+          RemainingConditions += ")";
+        }
+      }
+    } else {
+      // this is the default
+      if (useSwitch) {
+        PrintIndented("default: {\n");
+      } else {
+        if (HasContent) {
+          if (RemainingConditions.size() > 0) {
+            if (First) {
+              PrintIndented("if (%s) {\n", RemainingConditions.c_str());
+              First = false;
+            } else {
+              PrintIndented("} else if (%s) {\n", RemainingConditions.c_str());
+            }
+          } else if (!First) {
+            PrintIndented("} else {\n");
+          }
+        }
+      }
+    }
+    if (!First) Indenter::Indent();
+    Details->Render(Target, SetCurrLabel);
+    if (HasFusedContent) {
+      Fused->InnerMap.find(Target)->second->Render(InLoop);
+    }
+    if (useSwitch && iter != ProcessedBranchesOut.end()) {
+      PrintIndented("break;\n");
+    }
+    if (!First) Indenter::Unindent();
+    if (useSwitch) {
+      PrintIndented("}\n");
+    }
+    if (iter == ProcessedBranchesOut.end()) break;
+  }
+  if (!First) PrintIndented("}\n");
+
+  if (Fused) {
+    Fused->RenderLoopPostfix();
+  }
+}
+
+// Shape
+
+int Shape::IdCounter = 0;
+
+// MultipleShape
+
+void MultipleShape::RenderLoopPrefix() {
+  if (NeedLoop) {
+    if (Labeled) {
+      PrintIndented("L%d: do {\n", Id);
+    } else {
+      PrintIndented("do {\n");
+    }
+    Indenter::Indent();
+  }
+}
+
+void MultipleShape::RenderLoopPostfix() {
+  if (NeedLoop) {
+    Indenter::Unindent();
+    PrintIndented("} while(0);\n");
+  }
+}
+
+void MultipleShape::Render(bool InLoop) {
+  RenderLoopPrefix();
+  bool First = true;
+  for (BlockShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) {
+    if (AsmJS) {
+      PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first->Id);
+    } else {
+      PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first->Id);
+    }
+    First = false;
+    Indenter::Indent();
+    iter->second->Render(InLoop);
+    Indenter::Unindent();
+    PrintIndented("}\n");
+  }
+  RenderLoopPostfix();
+  if (Next) Next->Render(InLoop);
+};
+
+// LoopShape
+
+void LoopShape::Render(bool InLoop) {
+  if (Labeled) {
+    PrintIndented("L%d: while(1) {\n", Id);
+  } else {
+    PrintIndented("while(1) {\n");
+  }
+  Indenter::Indent();
+  Inner->Render(true);
+  Indenter::Unindent();
+  PrintIndented("}\n");
+  if (Next) Next->Render(InLoop);
+};
+
+/*
+// EmulatedShape
+
+void EmulatedShape::Render(bool InLoop) {
+  PrintIndented("while(1) {\n");
+  Indenter::Indent();
+  PrintIndented("switch(label) {\n");
+  Indenter::Indent();
+  for (int i = 0; i < Blocks.size(); i++) {
+    Block *Curr = Blocks[i];
+    PrintIndented("case %d: {\n", Curr->Id);
+    Indenter::Indent();
+    Curr->Render(InLoop);
+    PrintIndented("break;\n");
+    Indenter::Unindent();
+    PrintIndented("}\n");
+  }
+  Indenter::Unindent();
+  PrintIndented("}\n");
+  Indenter::Unindent();
+  PrintIndented("}\n");
+  if (Next) Next->Render(InLoop);
+};
+*/
+
+// Relooper
+
+Relooper::Relooper() : Root(NULL) {
+}
+
+Relooper::~Relooper() {
+  for (int i = 0; i < Blocks.size(); i++) delete Blocks[i];
+  for (int i = 0; i < Shapes.size(); i++) delete Shapes[i];
+}
+
+void Relooper::AddBlock(Block *New) {
+  Blocks.push_back(New);
+}
+
+struct RelooperRecursor {
+  Relooper *Parent;
+  RelooperRecursor(Relooper *ParentInit) : Parent(ParentInit) {}
+};
+
+typedef std::list<Block*> BlockList;
+
+void Relooper::Calculate(Block *Entry) {
+  // Scan and optimize the input
+  struct PreOptimizer : public RelooperRecursor {
+    PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {}
+    BlockSet Live;
+
+    void FindLive(Block *Root) {
+      BlockList ToInvestigate;
+      ToInvestigate.push_back(Root);
+      while (ToInvestigate.size() > 0) {
+        Block *Curr = ToInvestigate.front();
+        ToInvestigate.pop_front();
+        if (contains(Live, Curr)) continue;
+        Live.insert(Curr);
+        for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+          ToInvestigate.push_back(iter->first);
+        }
+      }
+    }
+
+    // If a block has multiple entries but no exits, and it is small enough, it is useful to split it.
+    // A common example is a C++ function where everything ends up at a final exit block and does some
+    // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow
+    // reaching the final block
+    void SplitDeadEnds() {
+      int TotalCodeSize = 0;
+      for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) {
+        Block *Curr = *iter;
+        TotalCodeSize += strlen(Curr->Code);
+      }
+      BlockSet Splits;
+      BlockSet Removed;
+      //DebugDump(Live, "before");
+      for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) {
+        Block *Original = *iter;
+        if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; // only dead ends, for now
+        if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node
+        if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort
+        // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original)
+        PrintDebug("Splitting block %d\n", Original->Id);
+        for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) {
+          Block *Prior = *iter;
+          Block *Split = new Block(Original->Code, Original->BranchVar);
+          Parent->Blocks.push_back(Split);
+          PrintDebug("  to %d\n", Split->Id);
+          Split->BranchesIn.insert(Prior);
+          Branch *Details = Prior->BranchesOut[Original];
+          Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code);
+          Prior->BranchesOut.erase(Original);
+          for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) {
+            Block *Post = iter->first;
+            Branch *Details = iter->second;
+            Split->BranchesOut[Post] = new Branch(Details->Condition, Details->Code);
+            Post->BranchesIn.insert(Split);
+          }
+          Splits.insert(Split);
+          Removed.insert(Original);
+        }
+        for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) {
+          Block *Post = iter->first;
+          Post->BranchesIn.erase(Original);
+        }
+        //DebugDump(Live, "mid");
+      }
+      for (BlockSet::iterator iter = Splits.begin(); iter != Splits.end(); iter++) {
+        Live.insert(*iter);
+      }
+      for (BlockSet::iterator iter = Removed.begin(); iter != Removed.end(); iter++) {
+        Live.erase(*iter);
+      }
+      //DebugDump(Live, "after");
+    }
+  };
+  PreOptimizer Pre(this);
+  Pre.FindLive(Entry);
+
+  // Add incoming branches from live blocks, ignoring dead code
+  for (int i = 0; i < Blocks.size(); i++) {
+    Block *Curr = Blocks[i];
+    if (!contains(Pre.Live, Curr)) continue;
+    for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+      iter->first->BranchesIn.insert(Curr);
+    }
+  }
+
+  Pre.SplitDeadEnds();
+
+  // Recursively process the graph
+
+  struct Analyzer : public RelooperRecursor {
+    Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {}
+
+    // Add a shape to the list of shapes in this Relooper calculation
+    void Notice(Shape *New) {
+      Parent->Shapes.push_back(New);
+    }
+
+    // Create a list of entries from a block. If LimitTo is provided, only results in that set
+    // will appear
+    void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) {
+      for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) {
+        if (!LimitTo || contains(*LimitTo, iter->first)) {
+          Entries.insert(iter->first);
+        }
+      }
+    }
+
+    // Converts/processes all branchings to a specific target
+    void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, BlockSet &From) {
+      PrintDebug("Solipsizing branches into %d\n", Target->Id);
+      DebugDump(From, "  relevant to solipsize: ");
+      for (BlockSet::iterator iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) {
+        Block *Prior = *iter;
+        if (!contains(From, Prior)) {
+          iter++;
+          continue;
+        }
+        Branch *PriorOut = Prior->BranchesOut[Target];
+        PriorOut->Ancestor = Ancestor;
+        PriorOut->Type = Type;
+        if (MultipleShape *Multiple = Shape::IsMultiple(Ancestor)) {
+          Multiple->NeedLoop++; // We are breaking out of this Multiple, so need a loop
+        }
+        iter++; // carefully increment iter before erasing
+        Target->BranchesIn.erase(Prior);
+        Target->ProcessedBranchesIn.insert(Prior);
+        Prior->BranchesOut.erase(Target);
+        Prior->ProcessedBranchesOut[Target] = PriorOut;
+        PrintDebug("  eliminated branch from %d\n", Prior->Id);
+      }
+    }
+
+    Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
+      PrintDebug("creating simple block with block #%d\n", Inner->Id);
+      SimpleShape *Simple = new SimpleShape;
+      Notice(Simple);
+      Simple->Inner = Inner;
+      Inner->Parent = Simple;
+      if (Blocks.size() > 1) {
+        Blocks.erase(Inner);
+        GetBlocksOut(Inner, NextEntries, &Blocks);
+        BlockSet JustInner;
+        JustInner.insert(Inner);
+        for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) {
+          Solipsize(*iter, Branch::Direct, Simple, JustInner);
+        }
+      }
+      return Simple;
+    }
+
+    Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) {
+      // Find the inner blocks in this loop. Proceed backwards from the entries until
+      // you reach a seen block, collecting as you go.
+      BlockSet InnerBlocks;
+      BlockSet Queue = Entries;
+      while (Queue.size() > 0) {
+        Block *Curr = *(Queue.begin());
+        Queue.erase(Queue.begin());
+        if (!contains(InnerBlocks, Curr)) {
+          // This element is new, mark it as inner and remove from outer
+          InnerBlocks.insert(Curr);
+          Blocks.erase(Curr);
+          // Add the elements prior to it
+          for (BlockSet::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) {
+            Queue.insert(*iter);
+          }
+#if 0
+          // Add elements it leads to, if they are dead ends. There is no reason not to hoist dead ends
+          // into loops, as it can avoid multiple entries after the loop
+          for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+            Block *Target = iter->first;
+            if (Target->BranchesIn.size() <= 1 && Target->BranchesOut.size() == 0) {
+              Queue.insert(Target);
+            }
+          }
+#endif
+        }
+      }
+      assert(InnerBlocks.size() > 0);
+
+      for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) {
+        Block *Curr = *iter;
+        for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+          Block *Possible = iter->first;
+          if (!contains(InnerBlocks, Possible)) {
+            NextEntries.insert(Possible);
+          }
+        }
+      }
+
+#if 0
+      // We can avoid multiple next entries by hoisting them into the loop.
+      if (NextEntries.size() > 1) {
+        BlockBlockSetMap IndependentGroups;
+        FindIndependentGroups(NextEntries, IndependentGroups, &InnerBlocks);
+
+        while (IndependentGroups.size() > 0 && NextEntries.size() > 1) {
+          Block *Min = NULL;
+          int MinSize = 0;
+          for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) {
+            Block *Entry = iter->first;
+            BlockSet &Blocks = iter->second;
+            if (!Min || Blocks.size() < MinSize) { // TODO: code size, not # of blocks
+              Min = Entry;
+              MinSize = Blocks.size();
+            }
+          }
+          // check how many new entries this would cause
+          BlockSet &Hoisted = IndependentGroups[Min];
+          bool abort = false;
+          for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end() && !abort; iter++) {
+            Block *Curr = *iter;
+            for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+              Block *Target = iter->first;
+              if (Hoisted.find(Target) == Hoisted.end() && NextEntries.find(Target) == NextEntries.end()) {
+                // abort this hoisting
+                abort = true;
+                break;
+              }
+            }
+          }
+          if (abort) {
+            IndependentGroups.erase(Min);
+            continue;
+          }
+          // hoist this entry
+          PrintDebug("hoisting %d into loop\n", Min->Id);
+          NextEntries.erase(Min);
+          for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end(); iter++) {
+            Block *Curr = *iter;
+            InnerBlocks.insert(Curr);
+            Blocks.erase(Curr);
+          }
+          IndependentGroups.erase(Min);
+        }
+      }
+#endif
+
+      PrintDebug("creating loop block:\n");
+      DebugDump(InnerBlocks, "  inner blocks:");
+      DebugDump(Entries, "  inner entries:");
+      DebugDump(Blocks, "  outer blocks:");
+      DebugDump(NextEntries, "  outer entries:");
+
+      LoopShape *Loop = new LoopShape();
+      Notice(Loop);
+
+      // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations)
+      // A. Branches to the loop entries become a continue to this shape
+      for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
+        Solipsize(*iter, Branch::Continue, Loop, InnerBlocks);
+      }
+      // B. Branches to outside the loop (a next entry) become breaks on this shape
+      for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) {
+        Solipsize(*iter, Branch::Break, Loop, InnerBlocks);
+      }
+      // Finish up
+      Shape *Inner = Process(InnerBlocks, Entries, NULL);
+      Loop->Inner = Inner;
+      return Loop;
+    }
+
+    // For each entry, find the independent group reachable by it. The independent group is
+    // the entry itself, plus all the blocks it can reach that cannot be directly reached by another entry. Note that we
+    // ignore directly reaching the entry itself by another entry.
+    //   @param Ignore - previous blocks that are irrelevant
+    void FindIndependentGroups(BlockSet &Entries, BlockBlockSetMap& IndependentGroups, BlockSet *Ignore=NULL) {
+      typedef std::map<Block*, Block*> BlockBlockMap;
+
+      struct HelperClass {
+        BlockBlockSetMap& IndependentGroups;
+        BlockBlockMap Ownership; // For each block, which entry it belongs to. We have reached it from there.
+
+        HelperClass(BlockBlockSetMap& IndependentGroupsInit) : IndependentGroups(IndependentGroupsInit) {}
+        void InvalidateWithChildren(Block *New) { // TODO: rename New
+          BlockList ToInvalidate; // Being in the list means you need to be invalidated
+          ToInvalidate.push_back(New);
+          while (ToInvalidate.size() > 0) {
+            Block *Invalidatee = ToInvalidate.front();
+            ToInvalidate.pop_front();
+            Block *Owner = Ownership[Invalidatee];
+            if (contains(IndependentGroups, Owner)) { // Owner may have been invalidated, do not add to IndependentGroups!
+              IndependentGroups[Owner].erase(Invalidatee);
+            }
+            if (Ownership[Invalidatee]) { // may have been seen before and invalidated already
+              Ownership[Invalidatee] = NULL;
+              for (BlockBranchMap::iterator iter = Invalidatee->BranchesOut.begin(); iter != Invalidatee->BranchesOut.end(); iter++) {
+                Block *Target = iter->first;
+                BlockBlockMap::iterator Known = Ownership.find(Target);
+                if (Known != Ownership.end()) {
+                  Block *TargetOwner = Known->second;
+                  if (TargetOwner) {
+                    ToInvalidate.push_back(Target);
+                  }
+                }
+              }
+            }
+          }
+        }
+      };
+      HelperClass Helper(IndependentGroups);
+
+      // We flow out from each of the entries, simultaneously.
+      // When we reach a new block, we add it as belonging to the one we got to it from.
+      // If we reach a new block that is already marked as belonging to someone, it is reachable by
+      // two entries and is not valid for any of them. Remove it and all it can reach that have been
+      // visited.
+
+      BlockList Queue; // Being in the queue means we just added this item, and we need to add its children
+      for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
+        Block *Entry = *iter;
+        Helper.Ownership[Entry] = Entry;
+        IndependentGroups[Entry].insert(Entry);
+        Queue.push_back(Entry);
+      }
+      while (Queue.size() > 0) {
+        Block *Curr = Queue.front();
+        Queue.pop_front();
+        Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership map if we are in the queue
+        if (!Owner) continue; // we have been invalidated meanwhile after being reached from two entries
+        // Add all children
+        for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+          Block *New = iter->first;
+          BlockBlockMap::iterator Known = Helper.Ownership.find(New);
+          if (Known == Helper.Ownership.end()) {
+            // New node. Add it, and put it in the queue
+            Helper.Ownership[New] = Owner;
+            IndependentGroups[Owner].insert(New);
+            Queue.push_back(New);
+            continue;
+          }
+          Block *NewOwner = Known->second;
+          if (!NewOwner) continue; // We reached an invalidated node
+          if (NewOwner != Owner) {
+            // Invalidate this and all reachable that we have seen - we reached this from two locations
+            Helper.InvalidateWithChildren(New);
+          }
+          // otherwise, we have the same owner, so do nothing
+        }
+      }
+
+      // Having processed all the interesting blocks, we remain with just one potential issue:
+      // If a->b, and a was invalidated, but then b was later reached by someone else, we must
+      // invalidate b. To check for this, we go over all elements in the independent groups,
+      // if an element has a parent which does *not* have the same owner, we must remove it
+      // and all its children.
+
+      for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
+        BlockSet &CurrGroup = IndependentGroups[*iter];
+        BlockList ToInvalidate;
+        for (BlockSet::iterator iter = CurrGroup.begin(); iter != CurrGroup.end(); iter++) {
+          Block *Child = *iter;
+          for (BlockSet::iterator iter = Child->BranchesIn.begin(); iter != Child->BranchesIn.end(); iter++) {
+            Block *Parent = *iter;
+            if (Ignore && contains(*Ignore, Parent)) continue;
+            if (Helper.Ownership[Parent] != Helper.Ownership[Child]) {
+              ToInvalidate.push_back(Child);
+            }
+          }
+        }
+        while (ToInvalidate.size() > 0) {
+          Block *Invalidatee = ToInvalidate.front();
+          ToInvalidate.pop_front();
+          Helper.InvalidateWithChildren(Invalidatee);
+        }
+      }
+
+      // Remove empty groups
+      for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
+        if (IndependentGroups[*iter].size() == 0) {
+          IndependentGroups.erase(*iter);
+        }
+      }
+
+#if DEBUG
+      PrintDebug("Investigated independent groups:\n");
+      for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) {
+        DebugDump(iter->second, " group: ");
+      }
+#endif
+    }
+
+    Shape *MakeMultiple(BlockSet &Blocks, BlockSet& Entries, BlockBlockSetMap& IndependentGroups, Shape *Prev, BlockSet &NextEntries) {
+      PrintDebug("creating multiple block with %d inner groups\n", IndependentGroups.size());
+      bool Fused = !!(Shape::IsSimple(Prev));
+      MultipleShape *Multiple = new MultipleShape();
+      Notice(Multiple);
+      BlockSet CurrEntries;
+      for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) {
+        Block *CurrEntry = iter->first;
+        BlockSet &CurrBlocks = iter->second;
+        PrintDebug("  multiple group with entry %d:\n", CurrEntry->Id);
+        DebugDump(CurrBlocks, "    ");
+        // Create inner block
+        CurrEntries.clear();
+        CurrEntries.insert(CurrEntry);
+        for (BlockSet::iterator iter = CurrBlocks.begin(); iter != CurrBlocks.end(); iter++) {
+          Block *CurrInner = *iter;
+          // Remove the block from the remaining blocks
+          Blocks.erase(CurrInner);
+          // Find new next entries and fix branches to them
+          for (BlockBranchMap::iterator iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) {
+            Block *CurrTarget = iter->first;
+            BlockBranchMap::iterator Next = iter;
+            Next++;
+            if (!contains(CurrBlocks, CurrTarget)) {
+              NextEntries.insert(CurrTarget);
+              Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); 
+            }
+            iter = Next; // increment carefully because Solipsize can remove us
+          }
+        }
+        Multiple->InnerMap[CurrEntry] = Process(CurrBlocks, CurrEntries, NULL);
+        // If we are not fused, then our entries will actually be checked
+        if (!Fused) {
+          CurrEntry->IsCheckedMultipleEntry = true;
+        }
+      }
+      DebugDump(Blocks, "  remaining blocks after multiple:");
+      // Add entries not handled as next entries, they are deferred
+      for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
+        Block *Entry = *iter;
+        if (!contains(IndependentGroups, Entry)) {
+          NextEntries.insert(Entry);
+        }
+      }
+      return Multiple;
+    }
+
+    // Main function.
+    // Process a set of blocks with specified entries, returns a shape
+    // The Make* functions receive a NextEntries. If they fill it with data, those are the entries for the
+    //   ->Next block on them, and the blocks are what remains in Blocks (which Make* modify). In this way
+    //   we avoid recursing on Next (imagine a long chain of Simples, if we recursed we could blow the stack).
+    Shape *Process(BlockSet &Blocks, BlockSet& InitialEntries, Shape *Prev) {
+      PrintDebug("Process() called\n");
+      BlockSet *Entries = &InitialEntries;
+      BlockSet TempEntries[2];
+      int CurrTempIndex = 0;
+      BlockSet *NextEntries;
+      Shape *Ret = NULL;
+      #define Make(call) \
+        Shape *Temp = call; \
+        if (Prev) Prev->Next = Temp; \
+        if (!Ret) Ret = Temp; \
+        if (!NextEntries->size()) { PrintDebug("Process() returning\n"); return Ret; } \
+        Prev = Temp; \
+        Entries = NextEntries; \
+        continue;
+      while (1) {
+        PrintDebug("Process() running\n");
+        DebugDump(Blocks, "  blocks : ");
+        DebugDump(*Entries, "  entries: ");
+
+        CurrTempIndex = 1-CurrTempIndex;
+        NextEntries = &TempEntries[CurrTempIndex];
+        NextEntries->clear();
+
+        if (Entries->size() == 0) return Ret;
+        if (Entries->size() == 1) {
+          Block *Curr = *(Entries->begin());
+          if (Curr->BranchesIn.size() == 0) {
+            // One entry, no looping ==> Simple
+            Make(MakeSimple(Blocks, Curr, *NextEntries));
+          }
+          // One entry, looping ==> Loop
+          Make(MakeLoop(Blocks, *Entries, *NextEntries));
+        }
+        // More than one entry, try to eliminate through a Multiple groups of
+        // independent blocks from an entry/ies. It is important to remove through
+        // multiples as opposed to looping since the former is more performant.
+        BlockBlockSetMap IndependentGroups;
+        FindIndependentGroups(*Entries, IndependentGroups);
+
+        PrintDebug("Independent groups: %d\n", IndependentGroups.size());
+
+        if (IndependentGroups.size() > 0) {
+          // We can handle a group in a multiple if its entry cannot be reached by another group.
+          // Note that it might be reachable by itself - a loop. But that is fine, we will create
+          // a loop inside the multiple block (which is the performant order to do it).
+          for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end();) {
+            Block *Entry = iter->first;
+            BlockSet &Group = iter->second;
+            BlockBlockSetMap::iterator curr = iter++; // iterate carefully, we may delete
+            for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) {
+              Block *Origin = *iterBranch;
+              if (!contains(Group, Origin)) {
+                // Reached from outside the group, so we cannot handle this
+                PrintDebug("Cannot handle group with entry %d because of incoming branch from %d\n", Entry->Id, Origin->Id);
+                IndependentGroups.erase(curr);
+                break;
+              }
+            }
+          }
+
+          // As an optimization, if we have 2 independent groups, and one is a small dead end, we can handle only that dead end.
+          // The other then becomes a Next - without nesting in the code and recursion in the analysis.
+          // TODO: if the larger is the only dead end, handle that too
+          // TODO: handle >2 groups
+          // TODO: handle not just dead ends, but also that do not branch to the NextEntries. However, must be careful
+          //       there since we create a Next, and that Next can prevent eliminating a break (since we no longer
+          //       naturally reach the same place), which may necessitate a one-time loop, which makes the unnesting
+          //       pointless.
+          if (IndependentGroups.size() == 2) {
+            // Find the smaller one
+            BlockBlockSetMap::iterator iter = IndependentGroups.begin();
+            Block *SmallEntry = iter->first;
+            int SmallSize = iter->second.size();
+            iter++;
+            Block *LargeEntry = iter->first;
+            int LargeSize = iter->second.size();
+            if (SmallSize != LargeSize) { // ignore the case where they are identical - keep things symmetrical there
+              if (SmallSize > LargeSize) {
+                Block *Temp = SmallEntry;
+                SmallEntry = LargeEntry;
+                LargeEntry = Temp; // Note: we did not flip the Sizes too, they are now invalid. TODO: use the smaller size as a limit?
+              }
+              // Check if dead end
+              bool DeadEnd = true;
+              BlockSet &SmallGroup = IndependentGroups[SmallEntry];
+              for (BlockSet::iterator iter = SmallGroup.begin(); iter != SmallGroup.end(); iter++) {
+                Block *Curr = *iter;
+                for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
+                  Block *Target = iter->first;
+                  if (!contains(SmallGroup, Target)) {
+                    DeadEnd = false;
+                    break;
+                  }
+                }
+                if (!DeadEnd) break;
+              }
+              if (DeadEnd) {
+                PrintDebug("Removing nesting by not handling large group because small group is dead end\n");
+                IndependentGroups.erase(LargeEntry);
+              }
+            }
+          }
+
+          PrintDebug("Handleable independent groups: %d\n", IndependentGroups.size());
+
+          if (IndependentGroups.size() > 0) {
+            // Some groups removable ==> Multiple
+            Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, *NextEntries));
+          }
+        }
+        // No independent groups, must be loopable ==> Loop
+        Make(MakeLoop(Blocks, *Entries, *NextEntries));
+      }
+    }
+  };
+
+  // Main
+
+  BlockSet AllBlocks;
+  for (BlockSet::iterator iter = Pre.Live.begin(); iter != Pre.Live.end(); iter++) {
+    Block *Curr = *iter;
+    AllBlocks.insert(Curr);
+#if DEBUG
+    PrintDebug("Adding block %d (%s)\n", Curr->Id, Curr->Code);
+#endif
+  }
+
+  BlockSet Entries;
+  Entries.insert(Entry);
+  Root = Analyzer(this).Process(AllBlocks, Entries, NULL);
+  assert(Root);
+
+  // Post optimizations
+
+  struct PostOptimizer {
+    Relooper *Parent;
+    void *Closure;
+
+    PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {}
+
+    #define RECURSE_Multiple(shape, func) \
+      for (BlockShapeMap::iterator iter = shape->InnerMap.begin(); iter != shape->InnerMap.end(); iter++) { \
+        func(iter->second); \
+      }
+    #define RECURSE_Loop(shape, func) \
+      func(shape->Inner);
+    #define RECURSE(shape, func) RECURSE_##shape(shape, func);
+
+    #define SHAPE_SWITCH(var, simple, multiple, loop) \
+      if (SimpleShape *Simple = Shape::IsSimple(var)) { \
+        simple; \
+      } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \
+        multiple; \
+      } else if (LoopShape *Loop = Shape::IsLoop(var)) { \
+        loop; \
+      }
+
+    // Find the blocks that natural control flow can get us directly to, or through a multiple that we ignore
+    void FollowNaturalFlow(Shape *S, BlockSet &Out) {
+      SHAPE_SWITCH(S, {
+        Out.insert(Simple->Inner);
+      }, {
+        for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) {
+          FollowNaturalFlow(iter->second, Out);
+        }
+        FollowNaturalFlow(Multiple->Next, Out);
+      }, {
+        FollowNaturalFlow(Loop->Inner, Out);
+      });
+    }
+
+    void FindNaturals(Shape *Root, Shape *Otherwise=NULL) {
+      if (Root->Next) {
+        Root->Natural = Root->Next;
+        FindNaturals(Root->Next, Otherwise);
+      } else {
+        Root->Natural = Otherwise;
+      }
+
+      SHAPE_SWITCH(Root, {
+      }, {
+        for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) {
+          FindNaturals(iter->second, Root->Natural);
+        }
+      }, {
+        FindNaturals(Loop->Inner, Loop->Inner);
+      });
+    }
+
+    // Remove unneeded breaks and continues.
+    // A flow operation is trivially unneeded if the shape we naturally get to by normal code
+    // execution is the same as the flow forces us to.
+    void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL, LoopShape *LastLoop=NULL) {
+      BlockSet NaturalBlocks;
+      FollowNaturalFlow(Natural, NaturalBlocks);
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = NULL;
+        SHAPE_SWITCH(Root, {
+          if (Simple->Inner->BranchVar) LastLoop = NULL; // a switch clears out the loop (TODO: only for breaks, not continue)
+
+          // If there is a next block, we already know at Simple creation time to make direct branches,
+          // and we can do nothing more. If there is no next however, then Natural is where we will
+          // go to by doing nothing, so we can potentially optimize some branches to direct.
+          if (Simple->Next) {
+            Next = Simple->Next;
+          } else {
+            for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
+              Block *Target = iter->first;
+              Branch *Details = iter->second;
+              if (Details->Type != Branch::Direct && contains(NaturalBlocks, Target)) { // note: cannot handle split blocks
+                Details->Type = Branch::Direct;
+                if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) {
+                  Multiple->NeedLoop--;
+                }
+              } else if (Details->Type == Branch::Break && LastLoop && LastLoop->Natural == Details->Ancestor->Natural) {
+                // it is important to simplify breaks, as simpler breaks enable other optimizations
+                Details->Labeled = false;
+                if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) {
+                  Multiple->NeedLoop--;
+                }
+              }
+            }
+          }
+        }, {
+          for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) {
+            RemoveUnneededFlows(iter->second, Multiple->Next, Multiple->NeedLoop ? NULL : LastLoop);
+          }
+          Next = Multiple->Next;
+        }, {
+          RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop);
+          Next = Loop->Next;
+        });
+      }
+    }
+
+    // After we know which loops exist, we can calculate which need to be labeled
+    void FindLabeledLoops(Shape *Root) {
+      bool First = Closure == NULL;
+      if (First) {
+        Closure = (void*)(new std::stack<Shape*>);
+      }
+      std::stack<Shape*> &LoopStack = *((std::stack<Shape*>*)Closure);
+
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = NULL;
+
+        SHAPE_SWITCH(Root, {
+          MultipleShape *Fused = Shape::IsMultiple(Root->Next);
+          // If we are fusing a Multiple with a loop into this Simple, then visit it now
+          if (Fused && Fused->NeedLoop) {
+            LoopStack.push(Fused);
+          }
+          if (Simple->Inner->BranchVar) {
+            LoopStack.push(NULL); // a switch means breaks are now useless, push a dummy
+          }
+          if (Fused) {
+            RECURSE_Multiple(Fused, FindLabeledLoops);
+          }
+          for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
+            Block *Target = iter->first;
+            Branch *Details = iter->second;
+            if (Details->Type != Branch::Direct) {
+              assert(LoopStack.size() > 0);
+              if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
+                LabeledShape *Labeled = Shape::IsLabeled(Details->Ancestor);
+                Labeled->Labeled = true;
+              } else {
+                Details->Labeled = false;
+              }
+            }
+          }
+          if (Simple->Inner->BranchVar) {
+            LoopStack.pop();
+          }
+          if (Fused && Fused->NeedLoop) {
+            LoopStack.pop();
+          }
+          if (Fused) {
+            Next = Fused->Next;
+          } else {
+            Next = Root->Next;
+          }
+        }, {
+          if (Multiple->NeedLoop) {
+            LoopStack.push(Multiple);
+          }
+          RECURSE(Multiple, FindLabeledLoops);
+          if (Multiple->NeedLoop) {
+            LoopStack.pop();
+          }
+          Next = Root->Next;
+        }, {
+          LoopStack.push(Loop);
+          RECURSE(Loop, FindLabeledLoops);
+          LoopStack.pop();
+          Next = Root->Next;
+        });
+      }
+
+      if (First) {
+        delete (std::stack<Shape*>*)Closure;
+      }
+    }
+
+    void Process(Shape *Root) {
+      FindNaturals(Root);
+      RemoveUnneededFlows(Root);
+      FindLabeledLoops(Root);
+    }
+  };
+
+  PrintDebug("=== Optimizing shapes ===\n");
+
+  PostOptimizer(this).Process(Root);
+}
+
+void Relooper::Render() {
+  OutputBuffer = OutputBufferRoot;
+  assert(Root);
+  Root->Render(false);
+}
+
+void Relooper::SetOutputBuffer(char *Buffer, int Size) {
+  OutputBufferRoot = OutputBuffer = Buffer;
+  OutputBufferSize = Size;
+}
+
+void Relooper::MakeOutputBuffer(int Size) {
+  OutputBufferRoot = OutputBuffer = (char*)malloc(Size);
+  OutputBufferSize = Size;
+}
+
+void Relooper::SetAsmJSMode(int On) {
+  AsmJS = On;
+}
+
+#if DEBUG
+// Debugging
+
+void Debugging::Dump(BlockSet &Blocks, const char *prefix) {
+  if (prefix) printf("%s ", prefix);
+  for (BlockSet::iterator iter = Blocks.begin(); iter != Blocks.end(); iter++) {
+    Block *Curr = *iter;
+    printf("%d:\n", Curr->Id);
+    for (BlockBranchMap::iterator iter2 = Curr->BranchesOut.begin(); iter2 != Curr->BranchesOut.end(); iter2++) {
+      Block *Other = iter2->first;
+      printf("  -> %d\n", Other->Id);
+      assert(contains(Other->BranchesIn, Curr));
+    }
+  }
+}
+
+void Debugging::Dump(Shape *S, const char *prefix) {
+  if (prefix) printf("%s ", prefix);
+  if (!S) {
+    printf(" (null)\n");
+    return;
+  }
+  printf(" %d ", S->Id);
+  SHAPE_SWITCH(S, {
+    printf("<< Simple with block %d\n", Simple->Inner->Id);
+  }, {
+    printf("<< Multiple\n");
+    for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) {
+      printf("     with entry %d\n", iter->first->Id);
+    }
+  }, {
+    printf("<< Loop\n");
+  });
+}
+
+static void PrintDebug(const char *Format, ...) {
+  printf("// ");
+  va_list Args;
+  va_start(Args, Format);
+  vprintf(Format, Args);
+  va_end(Args);
+}
+#endif
+
+// C API - useful for binding to other languages
+
+typedef std::map<void*, int> VoidIntMap;
+VoidIntMap __blockDebugMap__; // maps block pointers in currently running code to block ids, for generated debug output
+
+extern "C" {
+
+void rl_set_output_buffer(char *buffer, int size) {
+#if DEBUG
+  printf("#include \"Relooper.h\"\n");
+  printf("int main() {\n");
+  printf("  char buffer[100000];\n");
+  printf("  rl_set_output_buffer(buffer);\n");
+#endif
+  Relooper::SetOutputBuffer(buffer, size);
+}
+
+void rl_make_output_buffer(int size) {
+  Relooper::SetOutputBuffer((char*)malloc(size), size);
+}
+
+void rl_set_asm_js_mode(int on) {
+  Relooper::SetAsmJSMode(on);
+}
+
+void *rl_new_block(const char *text, const char *branch_var) {
+  Block *ret = new Block(text, branch_var);
+#if DEBUG
+  printf("  void *b%d = rl_new_block(\"// code %d\");\n", ret->Id, ret->Id);
+  __blockDebugMap__[ret] = ret->Id;
+  printf("  block_map[%d] = b%d;\n", ret->Id, ret->Id);
+#endif
+  return ret;
+}
+
+void rl_delete_block(void *block) {
+#if DEBUG
+  printf("  rl_delete_block(block_map[%d]);\n", ((Block*)block)->Id);
+#endif
+  delete (Block*)block;
+}
+
+void rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code) {
+#if DEBUG
+  printf("  rl_block_add_branch_to(block_map[%d], block_map[%d], %s%s%s, %s%s%s);\n", ((Block*)from)->Id, ((Block*)to)->Id, condition ? "\"" : "", condition ? condition : "NULL", condition ? "\"" : "", code ? "\"" : "", code ? code : "NULL", code ? "\"" : "");
+#endif
+  ((Block*)from)->AddBranchTo((Block*)to, condition, code);
+}
+
+void *rl_new_relooper() {
+#if DEBUG
+  printf("  void *block_map[10000];\n");
+  printf("  void *rl = rl_new_relooper();\n");
+#endif
+  return new Relooper;
+}
+
+void rl_delete_relooper(void *relooper) {
+  delete (Relooper*)relooper;
+}
+
+void rl_relooper_add_block(void *relooper, void *block) {
+#if DEBUG
+  printf("  rl_relooper_add_block(rl, block_map[%d]);\n", ((Block*)block)->Id);
+#endif
+  ((Relooper*)relooper)->AddBlock((Block*)block);
+}
+
+void rl_relooper_calculate(void *relooper, void *entry) {
+#if DEBUG
+  printf("  rl_relooper_calculate(rl, block_map[%d]);\n", ((Block*)entry)->Id);
+  printf("  rl_relooper_render(rl);\n");
+  printf("  rl_delete_relooper(rl);\n");
+  printf("  puts(buffer);\n");
+  printf("  return 0;\n");
+  printf("}\n");
+#endif
+  ((Relooper*)relooper)->Calculate((Block*)entry);
+}
+
+void rl_relooper_render(void *relooper) {
+  ((Relooper*)relooper)->Render();
+}
+
+}
+
diff --git a/lib/Target/CppBackend/Relooper.h b/lib/Target/CppBackend/Relooper.h
new file mode 100644
index 0000000000..f3dedf8c0e
--- /dev/null
+++ b/lib/Target/CppBackend/Relooper.h
@@ -0,0 +1,251 @@
+/*
+This is an optimized C++ implemention of the Relooper algorithm originally
+developed as part of Emscripten. This implementation includes optimizations
+added since the original academic paper [1] was published about it, and is
+written in an LLVM-friendly way with the goal of inclusion in upstream
+LLVM.
+
+[1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In Proceedings of the ACM international conference companion on Object oriented programming systems languages and applications companion (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 http://doi.acm.org/10.1145/2048147.2048224
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#ifdef __cplusplus
+
+#include <map>
+#include <deque>
+#include <set>
+
+struct Block;
+struct Shape;
+
+// Info about a branching from one block to another
+struct Branch {
+  enum FlowType {
+    Direct = 0, // We will directly reach the right location through other means, no need for continue or break
+    Break = 1,
+    Continue = 2
+  };
+  Shape *Ancestor; // If not NULL, this shape is the relevant one for purposes of getting to the target block. We break or continue on it
+  Branch::FlowType Type; // If Ancestor is not NULL, this says whether to break or continue
+  bool Labeled; // If a break or continue, whether we need to use a label
+  const char *Condition; // The condition for which we branch. For example, "my_var == 1". Conditions are checked one by one. One of the conditions should have NULL as the condition, in which case it is the default
+  const char *Code; // If provided, code that is run right before the branch is taken. This is useful for phis
+
+  Branch(const char *ConditionInit, const char *CodeInit=NULL);
+  ~Branch();
+
+  // Prints out the branch
+  void Render(Block *Target, bool SetLabel);
+};
+
+typedef std::set<Block*> BlockSet;
+typedef std::map<Block*, Branch*> BlockBranchMap;
+
+// Represents a basic block of code - some instructions that end with a
+// control flow modifier (a branch, return or throw).
+struct Block {
+  // Branches become processed after we finish the shape relevant to them. For example,
+  // when we recreate a loop, branches to the loop start become continues and are now
+  // processed. When we calculate what shape to generate from a set of blocks, we ignore
+  // processed branches.
+  // Blocks own the Branch objects they use, and destroy them when done.
+  BlockBranchMap BranchesOut;
+  BlockSet BranchesIn;
+  BlockBranchMap ProcessedBranchesOut;
+  BlockSet ProcessedBranchesIn;
+  Shape *Parent; // The shape we are directly inside
+  int Id; // A unique identifier
+  const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input)
+  const char *BranchVar; // If we have more than one branch out, the variable whose value determines where we go
+  bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable
+
+  Block(const char *CodeInit, const char *BranchVarInit);
+  ~Block();
+
+  void AddBranchTo(Block *Target, const char *Condition, const char *Code=NULL);
+
+  // Prints out the instructions code and branchings
+  void Render(bool InLoop);
+
+  // INTERNAL
+  static int IdCounter;
+};
+
+// Represents a structured control flow shape, one of
+//
+//  Simple: No control flow at all, just instructions. If several
+//          blocks, then 
+//
+//  Multiple: A shape with more than one entry. If the next block to
+//            be entered is among them, we run it and continue to
+//            the next shape, otherwise we continue immediately to the
+//            next shape.
+//
+//  Loop: An infinite loop.
+//
+//  Emulated: Control flow is managed by a switch in a loop. This
+//            is necessary in some cases, for example when control
+//            flow is not known until runtime (indirect branches,
+//            setjmp returns, etc.)
+//
+
+class SimpleShape;
+class LabeledShape;
+class MultipleShape;
+class LoopShape;
+
+struct Shape {
+  int Id; // A unique identifier. Used to identify loops, labels are Lx where x is the Id.
+  Shape *Next; // The shape that will appear in the code right after this one
+  Shape *Natural; // The shape that control flow gets to naturally (if there is Next, then this is Next)
+
+  enum ShapeType {
+    Simple,
+    Multiple,
+    Loop
+  };
+  ShapeType Type;
+
+  Shape(ShapeType TypeInit) : Id(Shape::IdCounter++), Next(NULL), Type(TypeInit) {}
+  virtual ~Shape() {}
+
+  virtual void Render(bool InLoop) = 0;
+
+  static SimpleShape *IsSimple(Shape *It) { return It && It->Type == Simple ? (SimpleShape*)It : NULL; }
+  static MultipleShape *IsMultiple(Shape *It) { return It && It->Type == Multiple ? (MultipleShape*)It : NULL; }
+  static LoopShape *IsLoop(Shape *It) { return It && It->Type == Loop ? (LoopShape*)It : NULL; }
+  static LabeledShape *IsLabeled(Shape *It) { return IsMultiple(It) || IsLoop(It) ? (LabeledShape*)It : NULL; }
+
+  // INTERNAL
+  static int IdCounter;
+};
+
+struct SimpleShape : public Shape {
+  Block *Inner;
+
+  SimpleShape() : Shape(Simple), Inner(NULL) {}
+  void Render(bool InLoop) {
+    Inner->Render(InLoop);
+    if (Next) Next->Render(InLoop);
+  }
+};
+
+typedef std::map<Block*, Shape*> BlockShapeMap;
+
+// A shape that may be implemented with a labeled loop.
+struct LabeledShape : public Shape {
+  bool Labeled; // If we have a loop, whether it needs to be labeled
+
+  LabeledShape(ShapeType TypeInit) : Shape(TypeInit), Labeled(false) {}
+};
+
+struct MultipleShape : public LabeledShape {
+  BlockShapeMap InnerMap; // entry block -> shape
+  int NeedLoop; // If we have branches, we need a loop. This is a counter of loop requirements,
+                // if we optimize it to 0, the loop is unneeded
+
+  MultipleShape() : LabeledShape(Multiple), NeedLoop(0) {}
+
+  void RenderLoopPrefix();
+  void RenderLoopPostfix();
+
+  void Render(bool InLoop);
+};
+
+struct LoopShape : public LabeledShape {
+  Shape *Inner;
+
+  LoopShape() : LabeledShape(Loop), Inner(NULL) {}
+  void Render(bool InLoop);
+};
+
+/*
+struct EmulatedShape : public Shape {
+  std::deque<Block*> Blocks;
+  void Render(bool InLoop);
+};
+*/
+
+// Implements the relooper algorithm for a function's blocks.
+//
+// Usage:
+//  1. Instantiate this struct.
+//  2. Call AddBlock with the blocks you have. Each should already
+//     have its branchings in specified (the branchings out will
+//     be calculated by the relooper).
+//  3. Call Render().
+//
+// Implementation details: The Relooper instance has
+// ownership of the blocks and shapes, and frees them when done.
+struct Relooper {
+  std::deque<Block*> Blocks;
+  std::deque<Shape*> Shapes;
+  Shape *Root;
+
+  Relooper();
+  ~Relooper();
+
+  void AddBlock(Block *New);
+
+  // Calculates the shapes
+  void Calculate(Block *Entry);
+
+  // Renders the result.
+  void Render();
+
+  // Sets the global buffer all printing goes to. Must call this or MakeOutputBuffer.
+  static void SetOutputBuffer(char *Buffer, int Size);
+
+  // Creates an output buffer. Must call this or SetOutputBuffer.
+  static void MakeOutputBuffer(int Size);
+
+  // Sets asm.js mode on or off (default is off)
+  static void SetAsmJSMode(int On);
+};
+
+typedef std::map<Block*, BlockSet> BlockBlockSetMap;
+
+#if DEBUG
+struct Debugging {
+  static void Dump(BlockSet &Blocks, const char *prefix=NULL);
+  static void Dump(Shape *S, const char *prefix=NULL);
+};
+#endif
+
+#endif // __cplusplus
+
+// C API - useful for binding to other languages
+
+#ifdef _WIN32
+  #ifdef RELOOPERDLL_EXPORTS
+    #define RELOOPERDLL_API __declspec(dllexport)
+  #else
+    #define RELOOPERDLL_API __declspec(dllimport)
+  #endif
+#else
+  #define RELOOPERDLL_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+RELOOPERDLL_API void  rl_set_output_buffer(char *buffer, int size);
+RELOOPERDLL_API void  rl_make_output_buffer(int size);
+RELOOPERDLL_API void  rl_set_asm_js_mode(int on);
+RELOOPERDLL_API void *rl_new_block(const char *text, const char *branch_var);
+RELOOPERDLL_API void  rl_delete_block(void *block);
+RELOOPERDLL_API void  rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code);
+RELOOPERDLL_API void *rl_new_relooper();
+RELOOPERDLL_API void  rl_delete_relooper(void *relooper);
+RELOOPERDLL_API void  rl_relooper_add_block(void *relooper, void *block);
+RELOOPERDLL_API void  rl_relooper_calculate(void *relooper, void *entry);
+RELOOPERDLL_API void  rl_relooper_render(void *relooper);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index 1ca74a4895..339b36105c 100644
--- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -20,8 +20,8 @@ static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
 }
 
 extern "C" void LLVMInitializeCppBackendTargetInfo() { 
-  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",    
-                                  "C++ backend",
+  TargetRegistry::RegisterTarget(TheCppBackendTarget, "js",
+                                  "JavaScript backend",
                                   &CppBackend_TripleMatchQuality);
 }