diff options
-rw-r--r-- | lib/Target/CppBackend/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/CppBackend/CPPBackend.cpp | 1010 | ||||
-rw-r--r-- | lib/Target/CppBackend/Relooper.cpp | 1287 | ||||
-rw-r--r-- | lib/Target/CppBackend/Relooper.h | 251 | ||||
-rw-r--r-- | lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp | 4 |
5 files changed, 2098 insertions, 455 deletions
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt index 515e1dd7e3..7e138b2695 100644 --- a/lib/Target/CppBackend/CMakeLists.txt +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_target(CppBackendCodeGen CPPBackend.cpp + Relooper.cpp ) add_subdirectory(TargetInfo) diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 3e69098edc..299db973f8 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -38,6 +38,10 @@ #include <set> using namespace llvm; +#define dump(x, ...) fprintf(stderr, x, __VA_ARGS__) + +#include <Relooper.h> + static cl::opt<std::string> FuncName("cppfname", cl::desc("Specify the name of the generated function"), cl::value_desc("function name")); @@ -85,7 +89,10 @@ namespace { typedef std::set<std::string> NameSet; typedef std::set<Type*> TypeSet; typedef std::set<const Value*> ValueSet; + typedef std::map<std::string, Type::TypeID> VarMap; typedef std::map<const Value*,std::string> ForwardRefMap; + typedef std::vector<unsigned char> HeapData; + typedef std::pair<unsigned, unsigned> Address; /// CppWriter - This class is the main chunk of code that converts an LLVM /// module to a C++ translation unit. @@ -98,14 +105,20 @@ namespace { NameSet UsedNames; TypeSet DefinedTypes; ValueSet DefinedValues; + VarMap UsedVars; ForwardRefMap ForwardRefs; bool is_inline; unsigned indent_level; + HeapData GlobalData8; + HeapData GlobalData32; + HeapData GlobalData64; + std::map<std::string, Address> GlobalAddresses; public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} + ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){ + } virtual const char *getPassName() const { return "C++ backend"; } @@ -134,7 +147,28 @@ namespace { void printCallingConv(CallingConv::ID cc); void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); - + void printCommaSeparated(const HeapData v); + + void allocateConstant(const Constant* CV); + unsigned getGlobalAddress(const std::string &s) { + Address a = GlobalAddresses[s]; + switch (a.second) { + case 64: + return a.first + 8; + case 32: + return a.first + 8 + GlobalData64.size(); + case 8: + return a.first + 8 + GlobalData64.size() + GlobalData32.size(); + default: + assert(false); + } + } + std::string getPtrLoad(const Value* Ptr); + std::string getPtrUse(const Value* Ptr); + std::string getPtr(const Value* Ptr); + std::string getConstant(const Constant*); + std::string getValueAsStr(const Value*); + std::string getValueAsParenStr(const Value*); std::string getCppName(Type* val); inline void printCppName(Type* val); @@ -145,6 +179,10 @@ namespace { void printType(Type* Ty); void printTypes(const Module* M); + std::string getAssign(const StringRef &, const Type *); + std::string getCast(const StringRef &, const Type *); + std::string getParenCast(const StringRef &, const Type *); + void printConstant(const Constant *CPV); void printConstants(const Module* M); @@ -155,10 +193,14 @@ namespace { void printFunctionUses(const Function *F); void printFunctionHead(const Function *F); void printFunctionBody(const Function *F); - void printInstruction(const Instruction *I, const std::string& bbname); + std::string generateInstruction(const Instruction *I); std::string getOpName(const Value*); void printModuleBody(); + + unsigned stackAlign(unsigned x) { + return x + (x%4 != 0 ? 4 - x%4 : 0); + } }; } // end anonymous namespace. @@ -171,8 +213,8 @@ formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) { } static inline void sanitize(std::string &str) { - for (size_t i = 0; i < str.length(); ++i) - if (!isalnum(str[i]) && str[i] != '_') + for (size_t i = 1; i < str.length(); ++i) + if (!isalnum(str[i]) && str[i] != '_' && str[i] != '$') str[i] = '_'; } @@ -217,8 +259,6 @@ void CppWriter::printCFP(const ConstantFP *CFP) { APFloat APF = APFloat(CFP->getValueAPF()); // copy if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(mod->getContext(), "; - Out << "APFloat("; #if HAVE_PRINTF_A char Buffer[100]; sprintf(Buffer, "%A", APF.convertToDouble()); @@ -257,11 +297,9 @@ void CppWriter::printCFP(const ConstantFP *CFP) { << utohexstr((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue()) << "U) /* " << StrVal << " */"; - Out << ")"; #if HAVE_PRINTF_A } #endif - Out << ")"; } void CppWriter::printCallingConv(CallingConv::ID cc){ @@ -423,40 +461,46 @@ std::string CppWriter::getCppName(const Value* val) { std::string name; ValueMap::iterator I = ValueNames.find(val); if (I != ValueNames.end() && I->first == val) - return I->second; - - if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { - name = std::string("gvar_") + - getTypePrefix(GV->getType()->getElementType()); - } else if (isa<Function>(val)) { - name = std::string("func_"); - } else if (const Constant* C = dyn_cast<Constant>(val)) { - name = std::string("const_") + getTypePrefix(C->getType()); - } else if (const Argument* Arg = dyn_cast<Argument>(val)) { - if (is_inline) { - unsigned argNum = std::distance(Arg->getParent()->arg_begin(), - Function::const_arg_iterator(Arg)) + 1; - name = std::string("arg_") + utostr(argNum); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; + return I->second; + + if (val->hasName()) { + if (dyn_cast<Function>(val)) { + name = std::string("_") + val->getName().str(); } else { - name = getTypePrefix(val->getType()); + name = std::string("$") + val->getName().str(); } + sanitize(name); } else { - name = getTypePrefix(val->getType()); - } - if (val->hasName()) - name += val->getName(); - else + if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { + name = std::string("gvar_") + + getTypePrefix(GV->getType()->getElementType()); + } else if (isa<Function>(val)) { + name = std::string("func_"); + } else if (const Constant* C = dyn_cast<Constant>(val)) { + name = std::string("const_") + getTypePrefix(C->getType()); + } else if (const Argument* Arg = dyn_cast<Argument>(val)) { + if (is_inline) { + unsigned argNum = std::distance(Arg->getParent()->arg_begin(), + Function::const_arg_iterator(Arg)) + 1; + name = std::string("arg_") + utostr(argNum); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; + } else { + name = getTypePrefix(val->getType()); + } + } else { + name = getTypePrefix(val->getType()); + } name += utostr(uniqueNum++); - sanitize(name); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); + sanitize(name); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + } return ValueNames[val] = name; } @@ -710,6 +754,29 @@ void CppWriter::printTypes(const Module* M) { } +std::string CppWriter::getAssign(const StringRef &s, const Type *t) { + UsedVars[s] = t->getTypeID(); + return (s + " = ").str(); +} + +std::string CppWriter::getCast(const StringRef &s, const Type *t) { + switch (t->getTypeID()) { + default: + assert(false && "Unsupported type"); + case Type::FloatTyID: + // TODO return ("Math_fround(" + s + ")").str(); + case Type::DoubleTyID: + return ("+" + s).str(); + case Type::IntegerTyID: + case Type::PointerTyID: + return (s + "|0").str(); + } +} + +std::string CppWriter::getParenCast(const StringRef &s, const Type *t) { + return getCast(("(" + s + ")").str(), t); +} + // printConstant - Print out a constant pool entry... void CppWriter::printConstant(const Constant *CV) { // First, if the constant is actually a GlobalValue (variable or function) @@ -721,20 +788,16 @@ void CppWriter::printConstant(const Constant *CV) { std::string constName(getCppName(CV)); std::string typeName(getCppName(CV->getType())); + //Out << "var " << constName << " = "; + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { std::string constValue = CI->getValue().toString(10, true); - Out << "ConstantInt* " << constName - << " = ConstantInt::get(mod->getContext(), APInt(" - << cast<IntegerType>(CI->getType())->getBitWidth() - << ", StringRef(\"" << constValue << "\"), 10));"; + Out << constValue << ";"; } else if (isa<ConstantAggregateZero>(CV)) { - Out << "ConstantAggregateZero* " << constName - << " = ConstantAggregateZero::get(" << typeName << ");"; + Out << "ConstantAggregateZero::get(" << typeName << ");"; } else if (isa<ConstantPointerNull>(CV)) { - Out << "ConstantPointerNull* " << constName - << " = ConstantPointerNull::get(" << typeName << ");"; + Out << "ConstantPointerNull::get(" << typeName << ");"; } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - Out << "ConstantFP* " << constName << " = "; printCFP(CFP); Out << ";"; } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { @@ -779,20 +842,13 @@ void CppWriter::printConstant(const Constant *CV) { } else if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) { if (CDS->isString()) { - Out << "Constant *" << constName << - " = ConstantDataArray::getString(mod->getContext(), \""; + Out << "allocate(["; StringRef Str = CDS->getAsString(); - bool nullTerminate = false; - if (Str.back() == 0) { - Str = Str.drop_back(); - nullTerminate = true; + for (unsigned int i = 0; i < Str.size(); i++) { + Out << (unsigned int)(Str.data()[i]); + if (i < Str.size()-1) Out << ","; } - printEscapedString(Str); - // Determine if we want null termination or not. - if (nullTerminate) - Out << "\", true);"; - else - Out << "\", false);";// No null terminator + Out << "], 'i8', ALLOC_STATIC);"; } else { // TODO: Could generate more efficient code generating CDS calls instead. Out << "std::vector<Constant*> " << constName << "_elems;"; @@ -813,19 +869,12 @@ void CppWriter::printConstant(const Constant *CV) { } } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { if (CE->getOpcode() == Instruction::GetElementPtr) { - Out << "std::vector<Constant*> " << constName << "_indices;"; - nl(Out); - printConstant(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { - printConstant(CE->getOperand(i)); - Out << constName << "_indices.push_back(" - << getCppName(CE->getOperand(i)) << ");"; - nl(Out); + Out << "allocate(["; + for (unsigned i = 0; i < CE->getNumOperands(); ++i ) { + Out << getCppName(CE->getOperand(i)); + if (i < CE->getNumOperands()-1) Out << ","; } - Out << "Constant* " << constName - << " = ConstantExpr::getGetElementPtr(" - << getCppName(CE->getOperand(0)) << ", " - << constName << "_indices);"; + Out << "], 'i32', ALLOC_STATIC);"; } else if (CE->isCast()) { printConstant(CE->getOperand(0)); Out << "Constant* " << constName << " = ConstantExpr::getCast("; @@ -936,8 +985,11 @@ void CppWriter::printConstants(const Module* M) { // Traverse all the global variables looking for constant initializers for (Module::const_global_iterator I = TheModule->global_begin(), E = TheModule->global_end(); I != E; ++I) - if (I->hasInitializer()) - printConstant(I->getInitializer()); + if (I->hasInitializer()) { + const Constant *CV = I->getInitializer(); + allocateConstant(CV); + GlobalAddresses[I->getName().str()] = GlobalAddresses[getCppName(CV)]; + } // Traverse the LLVM functions looking for constants for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); @@ -949,7 +1001,7 @@ void CppWriter::printConstants(const Module* M) { ++I) { for (unsigned i = 0; i < I->getNumOperands(); ++i) { if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) { - printConstant(C); + allocateConstant(C); } } } @@ -958,91 +1010,19 @@ void CppWriter::printConstants(const Module* M) { } void CppWriter::printVariableUses(const GlobalVariable *GV) { - nl(Out) << "// Type Definitions"; - nl(Out); - printType(GV->getType()); - if (GV->hasInitializer()) { - const Constant *Init = GV->getInitializer(); - printType(Init->getType()); - if (const Function *F = dyn_cast<Function>(Init)) { - nl(Out)<< "/ Function Declarations"; nl(Out); - printFunctionHead(F); - } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { - nl(Out) << "// Global Variable Declarations"; nl(Out); - printVariableHead(gv); - - nl(Out) << "// Global Variable Definitions"; nl(Out); - printVariableBody(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(Init); - } - } } void CppWriter::printVariableHead(const GlobalVariable *GV) { - nl(Out) << "GlobalVariable* " << getCppName(GV); - if (is_inline) { - Out << " = mod->getGlobalVariable(mod->getContext(), "; - printEscapedString(GV->getName()); - Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; - nl(Out) << "if (!" << getCppName(GV) << ") {"; - in(); nl(Out) << getCppName(GV); - } - Out << " = new GlobalVariable(/*Module=*/*mod, "; - nl(Out) << "/*Type=*/"; - printCppName(GV->getType()->getElementType()); - Out << ","; - nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); - Out << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(GV->getLinkage()); - Out << ","; - nl(Out) << "/*Initializer=*/0, "; - if (GV->hasInitializer()) { - Out << "// has initializer, specified below"; - } - nl(Out) << "/*Name=*/\""; - printEscapedString(GV->getName()); - Out << "\");"; - nl(Out); - - if (GV->hasSection()) { - printCppName(GV); - Out << "->setSection(\""; - printEscapedString(GV->getSection()); - Out << "\");"; - nl(Out); - } - if (GV->getAlignment()) { - printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; - nl(Out); - } - if (GV->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(GV); - Out << "->setVisibility("; - printVisibilityType(GV->getVisibility()); - Out << ");"; - nl(Out); - } - if (GV->isThreadLocal()) { - printCppName(GV); - Out << "->setThreadLocalMode("; - printThreadLocalMode(GV->getThreadLocalMode()); - Out << ");"; - nl(Out); - } - if (is_inline) { - out(); Out << "}"; nl(Out); - } + Out << "var "; + printCppName(GV); + Out << ";\n"; } void CppWriter::printVariableBody(const GlobalVariable *GV) { if (GV->hasInitializer()) { printCppName(GV); - Out << "->setInitializer("; - Out << getCppName(GV->getInitializer()) << ");"; + Out << " = "; + Out << getCppName(GV->getInitializer()) << ";"; nl(Out); } } @@ -1091,9 +1071,98 @@ static StringRef ConvertAtomicSynchScope(SynchronizationScope SynchScope) { llvm_unreachable("Unknown synch scope"); } -// printInstruction - This member is called for each Instruction in a function. -void CppWriter::printInstruction(const Instruction *I, - const std::string& bbname) { +std::string CppWriter::getPtrLoad(const Value* Ptr) { + Type *t = cast<PointerType>(Ptr->getType())->getElementType(); + return getCast(getPtrUse(Ptr), t); +} + +std::string CppWriter::getPtrUse(const Value* Ptr) { + Type *t = cast<PointerType>(Ptr->getType())->getElementType(); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + std::string text = ""; + unsigned Addr = getGlobalAddress(GV->getName().str()); + switch (t->getTypeID()) { + default: + assert(false && "Unsupported type"); + case Type::DoubleTyID: + return "HEAPF64[" + utostr(Addr >> 3) + "]"; + case Type::FloatTyID: + return "HEAPF32[" + utostr(Addr >> 2) + "]"; + case Type::IntegerTyID: + return "HEAP32[" + utostr(Addr >> 2) + "]"; + } + } else { + switch (t->getTypeID()) { + default: + assert(false && "Unsupported type"); + case Type::DoubleTyID: + return "HEAPF64[" + getOpName(Ptr) + ">>3]"; + case Type::FloatTyID: + return "HEAPF32[" + getOpName(Ptr) + ">>2]"; + case Type::IntegerTyID: + case Type::PointerTyID: + return "HEAP32[" + getOpName(Ptr) + ">>2]"; + } + } +} + +std::string CppWriter::getPtr(const Value* Ptr) { + Type *t = cast<PointerType>(Ptr->getType())->getElementType(); + if (const Constant *CV = dyn_cast<Constant>(Ptr)) { + std::string text = ""; + unsigned Addr = getGlobalAddress(CV->getName().str()); + switch (t->getTypeID()) { + default: + assert(false && "Unsupported type"); + case Type::DoubleTyID: + return utostr(Addr >> 3); + case Type::FloatTyID: + return utostr(Addr >> 2); + case Type::ArrayTyID: + case Type::StructTyID: + case Type::PointerTyID: + case Type::VectorTyID: + case Type::IntegerTyID: + return utostr(Addr >> 2); + } + } else { + return getOpName(Ptr) + "|0"; + } +} + +std::string CppWriter::getConstant(const Constant* CV) { + if (/* const PointerType *Ptr = */ dyn_cast<PointerType>(CV->getType())) { + return getPtr(CV); + } else { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + std::string S = ftostr(CFP->getValueAPF()); + S = '+' + S; +// if (S.find('.') == S.npos) { TODO: do this when necessary, but it is necessary even for 0.0001 + return S; + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + return CI->getValue().toString(10, true); + } else { + assert(false); + } + } +} + +std::string CppWriter::getValueAsStr(const Value* V) { + if (const Constant *CV = dyn_cast<Constant>(V)) { + return getConstant(CV); + } else { + return getCast(getCppName(V), V->getType()); + } +} + +std::string CppWriter::getValueAsParenStr(const Value* V) { + return "(" + getValueAsStr(V) + ")"; +} + +// generateInstruction - This member is called for each Instruction in a function. +std::string CppWriter::generateInstruction(const Instruction *I) { + std::string text = "NYI: " + std::string(I->getOpcodeName()); + std::string bbname = "NO_BBNAME"; std::string iName(getCppName(I)); // Before we emit this instruction, we need to take care of generating any @@ -1110,24 +1179,18 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::Ret: { const ReturnInst* ret = cast<ReturnInst>(I); - Out << "ReturnInst::Create(mod->getContext(), " - << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; + Value *RV = ret->getReturnValue(); + text = "STACKTOP = sp;"; + text += "return"; + if (RV == NULL) { + text += ";"; + } else { + text += " " + getValueAsStr(RV) + ";"; + } break; } case Instruction::Br: { - const BranchInst* br = cast<BranchInst>(I); - Out << "BranchInst::Create(" ; - if (br->getNumOperands() == 3) { - Out << opNames[2] << ", " - << opNames[1] << ", " - << opNames[0] << ", "; - - } else if (br->getNumOperands() == 1) { - Out << opNames[0] << ", "; - } else { - error("Branch with 2 operands?"); - } - Out << bbname << ");"; + text = ""; break; } case Instruction::Switch: { @@ -1213,14 +1276,15 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:{ - Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + //Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + text = getAssign(iName, Type::getInt32Ty(I->getContext())); switch (I->getOpcode()) { - case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::Add: text += getParenCast(getValueAsParenStr(I->getOperand(0)) + " + " + getValueAsParenStr(I->getOperand(1)), Type::getInt32Ty(I->getContext())) + ";"; break; case Instruction::FAdd: Out << "Instruction::FAdd"; break; - case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::Sub: text += getParenCast(getValueAsParenStr(I->getOperand(0)) + " - " + getValueAsParenStr(I->getOperand(1)), Type::getInt32Ty(I->getContext())) + ";"; break; case Instruction::FSub: Out << "Instruction::FSub"; break; case Instruction::Mul: Out << "Instruction::Mul"; break; - case Instruction::FMul: Out << "Instruction::FMul"; break; + case Instruction::FMul: text += getParenCast(getValueAsStr(I->getOperand(0)) + " * " + getValueAsStr(I->getOperand(1)), I->getType()) + ";"; break; case Instruction::UDiv:Out << "Instruction::UDiv"; break; case Instruction::SDiv:Out << "Instruction::SDiv"; break; case Instruction::FDiv:Out << "Instruction::FDiv"; break; @@ -1235,9 +1299,9 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::AShr:Out << "Instruction::AShr"; break; default: Out << "Instruction::BadOpCode"; break; } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; + //Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + //printEscapedString(I->getName()); + //Out << "\", " << bbname << ");"; break; } case Instruction::FCmp: { @@ -1267,96 +1331,59 @@ void CppWriter::printInstruction(const Instruction *I, break; } case Instruction::ICmp: { - Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; + text = getAssign(iName, Type::getInt32Ty(I->getContext())) + "(" + getValueAsStr(I->getOperand(0)) + ")"; switch (cast<ICmpInst>(I)->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; - case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; - case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; - case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; - case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; - case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; - case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; - case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; - case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; - case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; - default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; + case ICmpInst::ICMP_EQ: text += "=="; break; + case ICmpInst::ICMP_NE: text += "!="; break; + // TODO: handle signed and unsigned + case ICmpInst::ICMP_ULE: text += "<="; break; + case ICmpInst::ICMP_SLE: text += "<="; break; + case ICmpInst::ICMP_UGE: text += ">="; break; + case ICmpInst::ICMP_SGE: text += ">="; break; + case ICmpInst::ICMP_ULT: text += "<"; break; + case ICmpInst::ICMP_SLT: text += "<"; break; + case ICmpInst::ICMP_UGT: text += ">"; break; + case ICmpInst::ICMP_SGT: text += ">"; break; + default: text += "ICmpInst::BAD_ICMP_PREDICATE"; break; } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; + text += "(" + getValueAsStr(I->getOperand(1)) + ")"; break; } case Instruction::Alloca: { const AllocaInst* allocaI = cast<AllocaInst>(I); - Out << "AllocaInst* " << iName << " = new AllocaInst(" - << getCppName(allocaI->getAllocatedType()) << ", "; - if (allocaI->isArrayAllocation()) - Out << opNames[0] << ", "; - Out << "\""; - printEscapedString(allocaI->getName()); - Out << "\", " << bbname << ");"; - if (allocaI->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << allocaI->getAlignment() << ");"; + Type *t = allocaI->getAllocatedType(); + unsigned size; + if (ArrayType *AT = dyn_cast<ArrayType>(t)) { + size = AT->getElementType()->getScalarSizeInBits()/8 * AT->getNumElements(); + } else { + size = t->getScalarSizeInBits()/8; + } + text = getAssign(iName, Type::getInt32Ty(I->getContext())) + "STACKTOP; STACKTOP = STACKTOP + " + Twine(stackAlign(size)).str() + "|0;"; break; } case Instruction::Load: { - const LoadInst* load = cast<LoadInst>(I); - Out << "LoadInst* " << iName << " = new LoadInst(" - << opNames[0] << ", \""; - printEscapedString(load->getName()); - Out << "\", " << (load->isVolatile() ? "true" : "false" ) - << ", " << bbname << ");"; - if (load->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << load->getAlignment() << ");"; - if (load->isAtomic()) { - StringRef Ordering = ConvertAtomicOrdering(load->getOrdering()); - StringRef CrossThread = ConvertAtomicSynchScope(load->getSynchScope()); - nl(Out) << iName << "->setAtomic(" - << Ordering << ", " << CrossThread << ");"; - } + const LoadInst *LI = cast<LoadInst>(I); + const Value *Ptr = LI->getPointerOperand(); + Type *t = cast<PointerType>(Ptr->getType())->getElementType(); + text = getAssign(iName, t) + getPtrLoad(Ptr) + ";"; break; } case Instruction::Store: { - const StoreInst* store = cast<StoreInst>(I); - Out << "StoreInst* " << iName << " = new StoreInst(" - << opNames[0] << ", " - << opNames[1] << ", " - << (store->isVolatile() ? "true" : "false") - << ", " << bbname << ");"; - if (store->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << store->getAlignment() << ");"; - if (store->isAtomic()) { - StringRef Ordering = ConvertAtomicOrdering(store->getOrdering()); - StringRef CrossThread = ConvertAtomicSynchScope(store->getSynchScope()); - nl(Out) << iName << "->setAtomic(" - << Ordering << ", " << CrossThread << ");"; + const StoreInst *SI = cast<StoreInst>(I); + const Value *P = SI->getPointerOperand(); + const Value *V = SI->getValueOperand(); + std::string VS = getValueAsStr(V); + if (V->getType()->isDoubleTy() && SI->getAlignment() == 4) { + // only 4-byte aligned, copy carefully + std::string PS = getOpName(P); + text = "HEAPF64[tempDoublePtr>>3]=" + VS + ";HEAPF32[" + PS + ">>2]=HEAPF32[tempDoublePtr>>2];HEAPF32[" + PS + "+4>>2]=HEAPF32[tempDoublePtr+4>>2];"; + } else { + text = getPtrUse(P) + " = " + VS + ";"; } break; } case Instruction::GetElementPtr: { - const GetElementPtrInst* gep = cast<GetElementPtrInst>(I); - if (gep->getNumOperands() <= 2) { - Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" - << opNames[0]; - if (gep->getNumOperands() == 2) - Out << ", " << opNames[1]; - } else { - Out << "std::vector<Value*> " << iName << "_indices;"; - nl(Out); - for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { - Out << iName << "_indices.push_back(" - << opNames[i] << ");"; - nl(Out); - } - Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices"; - } - Out << ", \""; - printEscapedString(gep->getName()); - Out << "\", " << bbname << ");"; + assert(false && "Unhandled instruction"); break; } case Instruction::PHI: { @@ -1376,6 +1403,18 @@ void CppWriter::printInstruction(const Instruction *I, } break; } + case Instruction::PtrToInt: + text = getAssign(iName, Type::getInt32Ty(I->getContext())); + if (const Constant *CV = dyn_cast<Constant>(I->getOperand(0))) { + text += utostr(getGlobalAddress(CV->getName().str())); + } else { + text += getCast(opNames[0], Type::getInt32Ty(I->getContext())); + } + text += ";"; + break; + case Instruction::IntToPtr: + text = getAssign(iName, Type::getInt32Ty(I->getContext())) + opNames[0] + ";"; + break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1385,71 +1424,45 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: - case Instruction::PtrToInt: - case Instruction::IntToPtr: case Instruction::BitCast: { - const CastInst* cst = cast<CastInst>(I); - Out << "CastInst* " << iName << " = new "; switch (I->getOpcode()) { case Instruction::Trunc: Out << "TruncInst"; break; case Instruction::ZExt: Out << "ZExtInst"; break; case Instruction::SExt: Out << "SExtInst"; break; case Instruction::FPTrunc: Out << "FPTruncInst"; break; - case Instruction::FPExt: Out << "FPExtInst"; break; + case Instruction::FPExt: text = getAssign(iName, Type::getFloatTy(I->getContext())) + opNames[0] + ";"; break; case Instruction::FPToUI: Out << "FPToUIInst"; break; case Instruction::FPToSI: Out << "FPToSIInst"; break; case Instruction::UIToFP: Out << "UIToFPInst"; break; - case Instruction::SIToFP: Out << "SIToFPInst"; break; case Instruction::PtrToInt: Out << "PtrToIntInst"; break; case Instruction::IntToPtr: Out << "IntToPtrInst"; break; - case Instruction::BitCast: Out << "BitCastInst"; break; + case Instruction::SIToFP: + case Instruction::BitCast: text = getAssign(iName, I->getOperand(0)->getType()) + getValueAsStr(I->getOperand(0)) + ";"; break; default: llvm_unreachable("Unreachable"); } - Out << "(" << opNames[0] << ", " - << getCppName(cst->getType()) << ", \""; - printEscapedString(cst->getName()); - Out << "\", " << bbname << ");"; break; } case Instruction::Call: { const CallInst* call = cast<CallInst>(I); - if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) { - Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" - << getCppName(ila->getFunctionType()) << ", \"" - << ila->getAsmString() << "\", \"" - << ila->getConstraintString() << "\"," - << (ila->hasSideEffects() ? "true" : "false") << ");"; - nl(Out); - } - if (call->getNumArgOperands() > 1) { - Out << "std::vector<Value*> " << iName << "_params;"; - nl(Out); - for (unsigned i = 0; i < call->getNumArgOperands(); ++i) { - Out << iName << "_params.push_back(" << opNames[i] << ");"; - nl(Out); + const int numArgs = call->getNumArgOperands(); + Type *RT = call->getCalledFunction()->getReturnType(); + text = opNames[numArgs] + "("; + for (int i = 0; i < numArgs; i++) { + Value *Arg = call->getArgOperand(i); + Type *t = dyn_cast<PointerType>(Arg->getType()); + const GlobalVariable *GV; + if (t && (GV = dyn_cast<GlobalVariable>(Arg))) { + text += utostr(getGlobalAddress(GV->getName().str())); + } else { + text += getValueAsStr(call->getArgOperand(i)); } - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[call->getNumArgOperands()] << ", " - << iName << "_params, \""; - } else if (call->getNumArgOperands() == 1) { - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; - } else { - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[call->getNumArgOperands()] << ", \""; + if (i < numArgs - 1) text += ", "; } - printEscapedString(call->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(call->getCallingConv()); - Out << ");"; - nl(Out) << iName << "->setTailCall(" - << (call->isTailCall() ? "true" : "false"); - Out << ");"; - nl(Out); - printAttributes(call->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; - nl(Out); + text += ")"; + if (!RT->isVoidTy()) { + text = getAssign(iName, RT) + getCast(text, RT); + } + text += ";"; break; } case Instruction::Select: { @@ -1591,8 +1604,8 @@ void CppWriter::printInstruction(const Instruction *I, } } DefinedValues.insert(I); - nl(Out); delete [] opNames; + return text; } // Print out the types, constants and declarations needed by one function @@ -1654,42 +1667,6 @@ void CppWriter::printFunctionUses(const Function* F) { } } - // Print the function declarations for any functions encountered - nl(Out) << "// Function Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (Function* Fun = dyn_cast<Function>(*I)) { - if (!is_inline || Fun != F) - printFunctionHead(Fun); - } - } - - // Print the global variable declarations for any variables encountered - nl(Out) << "// Global Variable Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I)) - printVariableHead(F); - } - - // Print the constants found - nl(Out) << "// Constant Definitions"; nl(Out); - for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), - E = consts.end(); I != E; ++I) { - printConstant(*I); - } - - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - if (GenerationType != GenFunction) { - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) - printVariableBody(GV); - } - } } void CppWriter::printFunctionHead(const Function* F) { @@ -1745,61 +1722,117 @@ void CppWriter::printFunctionHead(const Function* F) { } void CppWriter::printFunctionBody(const Function *F) { - if (F->isDeclaration()) - return; // external functions have no bodies. + assert(!F->isDeclaration()); // Clear the DefinedValues and ForwardRefs maps because we can't have // cross-function forward refs ForwardRefs.clear(); DefinedValues.clear(); + UsedVars.clear(); + // Create all the argument values if (!is_inline) { - if (!F->arg_empty()) { - Out << "Function::arg_iterator args = " << getCppName(F) - << "->arg_begin();"; - nl(Out); - } for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { - Out << "Value* " << getCppName(AI) << " = args++;"; - nl(Out); if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\""; - printEscapedString(AI->getName()); - Out << "\");"; - nl(Out); + //Out << getCppName(AI) << "->setName(\""; + //printEscapedString(AI->getName()); + //Out << "\");"; + //nl(Out); + } else { } } } - // Create all the basic blocks - nl(Out); + // Prepare relooper TODO: resize buffer as needed + #define RELOOPER_BUFFER 10*1024*1024 + static char *buffer = new char[RELOOPER_BUFFER]; + Relooper::SetOutputBuffer(buffer, RELOOPER_BUFFER); + Relooper R; + Block *Entry = NULL; + std::map<const BasicBlock*, Block*> LLVMToRelooper; + + // Create relooper blocks with their contents for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(mod->getContext(), \""; - if (BI->hasName()) - printEscapedString(BI->getName()); - Out << "\"," << getCppName(BI->getParent()) << ",0);"; - nl(Out); + std::string contents = ""; + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); + I != E; ++I) { + contents += " " + generateInstruction(I) + "\n"; + } + Block *Curr = new Block(contents.c_str(), NULL); // TODO: use branch vars so we get switches + const BasicBlock *BB = &*BI; + LLVMToRelooper[BB] = Curr; + R.AddBlock(Curr); + if (!Entry) Entry = Curr; } - // Output all of its basic blocks... for the function + // Create branchings for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; - nl(Out); + const TerminatorInst *TI = BI->getTerminator(); + switch (TI->getOpcode()) { + default: { + //error("Invalid branch instruction"); + break; + } + case Instruction::Br: { + const BranchInst* br = cast<BranchInst>(TI); + if (br->getNumOperands() == 3) { + BasicBlock *S0 = br->getSuccessor(0); + BasicBlock *S1 = br->getSuccessor(1); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S0], NULL); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S1], + getOpName(TI->getOperand(0)).c_str()); + } else if (br->getNumOperands() == 1) { + BasicBlock *S = br->getSuccessor(0); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S], NULL); + } else { + error("Branch with 2 operands?"); + } + break; + } + } + } - // Output all of the instructions in the basic block... - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - printInstruction(I,bbname); + // Calculate relooping and print + R.Calculate(Entry); + R.Render(); + + // Emit local variables + UsedVars["sp"] = Type::getInt32Ty(F->getContext())->getTypeID(); + if (!UsedVars.empty()) { + Out << " var "; + for (VarMap::iterator VI = UsedVars.begin(); VI != UsedVars.end(); ++VI) { + if (VI != UsedVars.begin()) { + Out << ", "; + } + Out << VI->first << " = "; + switch (VI->second) { + default: + assert(false); + case Type::PointerTyID: + case Type::IntegerTyID: + Out << "0"; + break; + case Type::FloatTyID: + // TODO Out << "Math_fround(0)"; + case Type::DoubleTyID: + Out << "+0"; // FIXME + break; + } } + Out << ";"; + nl(Out); } + // Emit stack entry + Out << " " + getAssign("sp", Type::getInt32Ty(F->getContext())) + "STACKTOP;"; + + // Emit (relooped) code + nl(Out) << buffer; + // Loop over the ForwardRefs and resolve them now that all instructions // are generated. if (!ForwardRefs.empty()) { @@ -1846,116 +1879,187 @@ void CppWriter::printInline(const std::string& fname, } void CppWriter::printModuleBody() { - // Print out all the type definitions - nl(Out) << "// Type Definitions"; nl(Out); - printTypes(TheModule); + // Calculate the constants definitions. + printConstants(TheModule); - // Functions can call each other and global variables can reference them so - // define all the functions first before emitting their function bodies. - nl(Out) << "// Function Declarations"; nl(Out); + // Emit function bodies. + nl(Out) << "// EMSCRIPTEN_START_FUNCTIONS"; nl(Out); for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) - printFunctionHead(I); + I != E; ++I) { + if (!I->isDeclaration()) { + Out << "function _" << I->getName() << "("; + for (Function::const_arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) { + if (AI != I->arg_begin()) Out << ","; + Out << getCppName(AI); + } + Out << ") {"; + nl(Out); + for (Function::const_arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) { + std::string name = getCppName(AI); + Out << " " << name << " = " << getCast(name, AI->getType()) << ";"; + nl(Out); + } + printFunctionBody(I); + Out << "}"; + nl(Out); + } + } + Out << "// EMSCRIPTEN_END_FUNCTIONS\n\n"; - // Process the global variables declarations. We can't initialze them until - // after the constants are printed so just print a header for each global - nl(Out) << "// Global Variable Declarations\n"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableHead(I); + // TODO fix commas + Out << "/* memory initializer */ allocate(["; + printCommaSeparated(GlobalData64); + if (GlobalData64.size() > 0 && GlobalData32.size() > 0) { + Out << ","; + } + printCommaSeparated(GlobalData32); + if (GlobalData32.size() > 0 && GlobalData8.size() > 0) { + Out << ","; } + printCommaSeparated(GlobalData8); + Out << "], \"i8\", ALLOC_NONE, Runtime.GLOBAL_BASE);"; - // Print out all the constants definitions. Constants don't recurse except - // through GlobalValues. All GlobalValues have been declared at this point - // so we can proceed to generate the constants. - nl(Out) << "// Constant Definitions"; nl(Out); - printConstants(TheModule); + // Emit metadata for emcc driver + Out << "\n\n// EMSCRIPTEN_METADATA\n"; + Out << "{\n"; - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableBody(I); + Out << "\"declares\": ["; + bool first = true; + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (I->isDeclaration()) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" + I->getName() + "\""; + } } - - // Finally, we can safely put out all of the function bodies. - nl(Out) << "// Function Definitions"; nl(Out); + Out << "],"; + Out << "\"implementedFunctions\": ["; + first = true; for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); I != E; ++I) { if (!I->isDeclaration()) { - nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) - << ")"; - nl(Out) << "{"; - nl(Out,1); - printFunctionBody(I); - nl(Out,-1) << "}"; - nl(Out); + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"_" << I->getName() << '"'; } } + Out << "]"; + + Out << "\n}\n"; +} + +#include <iostream> + +void CppWriter::allocateConstant(const Constant* CV) { + if (isa<GlobalValue>(CV)) + return; + + std::string name = getCppName(CV); + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CV)) { + if (CDS->isString()) { + GlobalAddresses[name] = Address(GlobalData8.size(), 8); + StringRef Str = CDS->getAsString(); + for (unsigned int i = 0; i < Str.size(); i++) { + GlobalData8.push_back(Str.data()[i]); + } + } else { + assert(false); + } + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + APFloat APF = CFP->getValueAPF(); + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) { + GlobalAddresses[name] = Address(GlobalData32.size(), 32); + union flt { float f; unsigned char b[sizeof(float)]; } flt; + flt.f = APF.convertToFloat(); + for (unsigned i = 0; i < sizeof(float); ++i) + GlobalData32.push_back(flt.b[i]); + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) { + GlobalAddresses[name] = Address(GlobalData64.size(), 64); + union dbl { double d; unsigned char b[sizeof(double)]; } dbl; + dbl.d = APF.convertToDouble(); + for (unsigned i = 0; i < sizeof(double); ++i) + GlobalData64.push_back(dbl.b[i]); + } else { + assert(false); + } + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + union { uint64_t i; unsigned char b[sizeof(uint64_t)]; } integer; + integer.i = *CI->getValue().getRawData(); + unsigned BitWidth = CI->getValue().getBitWidth(); + assert(BitWidth == 32 || BitWidth == 64); + HeapData *GlobalData = NULL; + switch (BitWidth) { + case 32: + GlobalData = &GlobalData32; + break; + case 64: + GlobalData = &GlobalData64; + break; + default: + assert(false); + } + // assuming compiler is little endian + GlobalAddresses[name] = Address(GlobalData->size(), BitWidth); + for (unsigned i = 0; i < BitWidth / 8; ++i) { + GlobalData->push_back(integer.b[i]); + } + } else if (/* const ConstantPointerNull *CPN = */ dyn_cast<ConstantPointerNull>(CV)) { + assert(false); + } else if (/* const ConstantAggregateZero *CAZ = */ dyn_cast<ConstantAggregateZero>(CV)) { + printf("Warning: ignoring CAZ\n"); + //Constant *C = CAZ->getSequentialElement(); + } else if (/* const ConstantArray *CA = */ dyn_cast<ConstantArray>(CV)) { + assert(false); + } else if (/* const ConstantStruct *CS = */ dyn_cast<ConstantStruct>(CV)) { + assert(false); + } else if (/* const ConstantVector *CVec = */ dyn_cast<ConstantVector>(CV)) { + assert(false); + } else if (/* const BlockAddress *BA = */ dyn_cast<BlockAddress>(CV)) { + assert(false); + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + assert(false && "Unhandled CE GEP"); + } else if (CE->isCast()) { + assert(false && "Unhandled cast"); + } else { + assert(false); + } + } else if (/* const UndefValue *UV = */ dyn_cast<UndefValue>(CV)) { + assert(false); + } else { + std::cout << getCppName(CV) << std::endl; + assert(false); + } +} + +void CppWriter::printCommaSeparated(const HeapData data) { + for (HeapData::const_iterator I = data.begin(); + I != data.end(); ++I) { + if (I != data.begin()) { + Out << ","; + } + Out << (int)*I; + } } void CppWriter::printProgram(const std::string& fname, const std::string& mName) { - Out << "#include <llvm/Pass.h>\n"; - Out << "#include <llvm/PassManager.h>\n"; - - Out << "#include <llvm/ADT/SmallVector.h>\n"; - Out << "#include <llvm/Analysis/Verifier.h>\n"; - Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; - Out << "#include <llvm/IR/BasicBlock.h>\n"; - Out << "#include <llvm/IR/CallingConv.h>\n"; - Out << "#include <llvm/IR/Constants.h>\n"; - Out << "#include <llvm/IR/DerivedTypes.h>\n"; - Out << "#include <llvm/IR/Function.h>\n"; - Out << "#include <llvm/IR/GlobalVariable.h>\n"; - Out << "#include <llvm/IR/InlineAsm.h>\n"; - Out << "#include <llvm/IR/Instructions.h>\n"; - Out << "#include <llvm/IR/LLVMContext.h>\n"; - Out << "#include <llvm/IR/Module.h>\n"; - Out << "#include <llvm/Support/FormattedStream.h>\n"; - Out << "#include <llvm/Support/MathExtras.h>\n"; - Out << "#include <algorithm>\n"; - Out << "using namespace llvm;\n\n"; - Out << "Module* " << fname << "();\n\n"; - Out << "int main(int argc, char**argv) {\n"; - Out << " Module* Mod = " << fname << "();\n"; - Out << " verifyModule(*Mod, PrintMessageAction);\n"; - Out << " PassManager PM;\n"; - Out << " PM.add(createPrintModulePass(&outs()));\n"; - Out << " PM.run(*Mod);\n"; - Out << " return 0;\n"; - Out << "}\n\n"; printModule(fname,mName); } void CppWriter::printModule(const std::string& fname, const std::string& mName) { - nl(Out) << "Module* " << fname << "() {"; - nl(Out,1) << "// Module Construction"; - nl(Out) << "Module* mod = new Module(\""; - printEscapedString(mName); - Out << "\", getGlobalContext());"; - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; - } - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() - << "\");"; - } - - if (!TheModule->getModuleInlineAsm().empty()) { - nl(Out) << "mod->setModuleInlineAsm(\""; - printEscapedString(TheModule->getModuleInlineAsm()); - Out << "\");"; - } - nl(Out); - printModuleBody(); - nl(Out) << "return mod;"; - nl(Out,-1) << "}"; - nl(Out); } void CppWriter::printContents(const std::string& fname, @@ -2032,7 +2136,7 @@ bool CppWriter::runOnModule(Module &M) { TheModule = &M; // Emit a header - Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; + Out << "//========================================\n\n"; // Get the name of the function we're supposed to generate std::string fname = FuncName.getValue(); diff --git a/lib/Target/CppBackend/Relooper.cpp b/lib/Target/CppBackend/Relooper.cpp new file mode 100644 index 0000000000..07b3631141 --- /dev/null +++ b/lib/Target/CppBackend/Relooper.cpp @@ -0,0 +1,1287 @@ + +#include "Relooper.h" + +#include <string.h> +#include <stdlib.h> +#include <list> +#include <stack> + +#if EMSCRIPTEN +#include "ministring.h" +#else +#include <string> +typedef std::string ministring; +#endif + +template <class T, class U> bool contains(const T& container, const U& contained) { + return container.find(contained) != container.end(); +} + +#if DEBUG +static void PrintDebug(const char *Format, ...); +#define DebugDump(x, ...) Debugging::Dump(x, __VA_ARGS__) +#else +#define PrintDebug(x, ...) +#define DebugDump(x, ...) +#endif + +#define INDENTATION 1 + +struct Indenter { + static int CurrIndent; + + static void Indent() { CurrIndent++; } + static void Unindent() { CurrIndent--; } +}; + +static void PrintIndented(const char *Format, ...); +static void PutIndented(const char *String); + +static char *OutputBufferRoot = NULL; +static char *OutputBuffer = NULL; +static int OutputBufferSize = 0; + +void PrintIndented(const char *Format, ...) { + assert(OutputBuffer); + assert(OutputBuffer + Indenter::CurrIndent*INDENTATION - OutputBufferRoot < OutputBufferSize); + for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' '; + va_list Args; + va_start(Args, Format); + int left = OutputBufferSize - (OutputBuffer - OutputBufferRoot); + int written = vsnprintf(OutputBuffer, left, Format, Args); + assert(written < left); + OutputBuffer += written; + va_end(Args); +} + +void PutIndented(const char *String) { + assert(OutputBuffer); + assert(OutputBuffer + Indenter::CurrIndent*INDENTATION - OutputBufferRoot < OutputBufferSize); + for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' '; + int left = OutputBufferSize - (OutputBuffer - OutputBufferRoot); + int needed = strlen(String)+1; + assert(needed < left); + strcpy(OutputBuffer, String); + OutputBuffer += strlen(String); + *OutputBuffer++ = '\n'; + *OutputBuffer = 0; +} + +static int AsmJS = 0; + +// Indenter + +#if EMSCRIPTEN +int Indenter::CurrIndent = 1; +#else +int Indenter::CurrIndent = 0; +#endif + +// Branch + +Branch::Branch(const char *ConditionInit, const char *CodeInit) : Ancestor(NULL), Labeled(true) { + Condition = ConditionInit ? strdup(ConditionInit) : NULL; + Code = CodeInit ? strdup(CodeInit) : NULL; +} + +Branch::~Branch() { + if (Condition) free((void*)Condition); + if (Code) free((void*)Code); +} + +void Branch::Render(Block *Target, bool SetLabel) { + if (Code) PrintIndented("%s\n", Code); + if (SetLabel) PrintIndented("label = %d;\n", Target->Id); + if (Ancestor) { + if (Type != Direct) { + if (Labeled) { + PrintIndented("%s L%d;\n", Type == Break ? "break" : "continue", Ancestor->Id); + } else { + PrintIndented("%s;\n", Type == Break ? "break" : "continue"); + } + } + } +} + +// Block + +int Block::IdCounter = 1; // 0 is reserved for clearings + +Block::Block(const char *CodeInit, const char *BranchVarInit) : Parent(NULL), Id(Block::IdCounter++), IsCheckedMultipleEntry(false) { + Code = strdup(CodeInit); + BranchVar = BranchVarInit ? strdup(BranchVarInit) : NULL; +} + +Block::~Block() { + if (Code) free((void*)Code); + if (BranchVar) free((void*)BranchVar); + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) { + delete iter->second; + } + // XXX If not reachable, expected to have branches here. But need to clean them up to prevent leaks! +} + +void Block::AddBranchTo(Block *Target, const char *Condition, const char *Code) { + assert(!contains(BranchesOut, Target)); // cannot add more than one branch to the same target + BranchesOut[Target] = new Branch(Condition, Code); +} + +void Block::Render(bool InLoop) { + if (IsCheckedMultipleEntry && InLoop) { + PrintIndented("label = 0;\n"); + } + + if (Code) { + // Print code in an indented manner, even over multiple lines + char *Start = const_cast<char*>(Code); + while (*Start) { + char *End = strchr(Start, '\n'); + if (End) *End = 0; + PutIndented(Start); + if (End) *End = '\n'; else break; + Start = End+1; + } + } + + if (!ProcessedBranchesOut.size()) return; + + bool SetLabel = true; // in some cases it is clear we can avoid setting label, see later + + // A setting of the label variable (label = x) is necessary if it can + // cause an impact. The main case is where we set label to x, then elsewhere + // we check if label is equal to that value, i.e., that label is an entry + // in a multiple block. We also need to reset the label when we enter + // that block, so that each setting is a one-time action: consider + // + // while (1) { + // if (check) label = 1; + // if (label == 1) { label = 0 } + // } + // + // (Note that this case is impossible due to fusing, but that is not + // material here.) So setting to 0 is important just to clear the 1 for + // future iterations. + // TODO: When inside a loop, if necessary clear the label variable + // once on the top, and never do settings that are in effect clears + + // Fusing: If the next is a Multiple, we can fuse it with this block. Note + // that we must be the Inner of a Simple, so fusing means joining a Simple + // to a Multiple. What happens there is that all options in the Multiple + // *must* appear in the Simple (the Simple is the only one reaching the + // Multiple), so we can remove the Multiple and add its independent groups + // into the Simple's branches. + MultipleShape *Fused = Shape::IsMultiple(Parent->Next); + if (Fused) { + PrintDebug("Fusing Multiple to Simple\n"); + Parent->Next = Parent->Next->Next; + Fused->RenderLoopPrefix(); + + // When the Multiple has the same number of groups as we have branches, + // they will all be fused, so it is safe to not set the label at all + if (SetLabel && Fused->InnerMap.size() == ProcessedBranchesOut.size()) { + SetLabel = false; + } + } + + Block *DefaultTarget(NULL); // The block we branch to without checking the condition, if none of the other conditions held. + + // Find the default target, the one without a condition + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) { + if (!iter->second->Condition) { + assert(!DefaultTarget); // Must be exactly one default + DefaultTarget = iter->first; + } + } + assert(DefaultTarget); // Since each block *must* branch somewhere, this must be set + + bool useSwitch = BranchVar != NULL; + + if (useSwitch) { + PrintIndented("switch (%s) {\n", BranchVar); + } + + ministring RemainingConditions; + bool First = !useSwitch; // when using a switch, there is no special first + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin();; iter++) { + Block *Target; + Branch *Details; + if (iter != ProcessedBranchesOut.end()) { + Target = iter->first; + if (Target == DefaultTarget) continue; // done at the end + Details = iter->second; + assert(Details->Condition); // must have a condition if this is not the default target + } else { + Target = DefaultTarget; + Details = ProcessedBranchesOut[DefaultTarget]; + } + bool SetCurrLabel = SetLabel && Target->IsCheckedMultipleEntry; + bool HasFusedContent = Fused && contains(Fused->InnerMap, Target); + bool HasContent = SetCurrLabel || Details->Type != Branch::Direct || HasFusedContent || Details->Code; + if (iter != ProcessedBranchesOut.end()) { + // If there is nothing to show in this branch, omit the condition + if (useSwitch) { + PrintIndented("%s {\n", Details->Condition); + } else { + if (HasContent) { + PrintIndented("%sif (%s) {\n", First ? "" : "} else ", Details->Condition); + First = false; + } else { + if (RemainingConditions.size() > 0) RemainingConditions += " && "; + RemainingConditions += "!("; + if (BranchVar) { + RemainingConditions += BranchVar; + RemainingConditions += " == "; + } + RemainingConditions += Details->Condition; + RemainingConditions += ")"; + } + } + } else { + // this is the default + if (useSwitch) { + PrintIndented("default: {\n"); + } else { + if (HasContent) { + if (RemainingConditions.size() > 0) { + if (First) { + PrintIndented("if (%s) {\n", RemainingConditions.c_str()); + First = false; + } else { + PrintIndented("} else if (%s) {\n", RemainingConditions.c_str()); + } + } else if (!First) { + PrintIndented("} else {\n"); + } + } + } + } + if (!First) Indenter::Indent(); + Details->Render(Target, SetCurrLabel); + if (HasFusedContent) { + Fused->InnerMap.find(Target)->second->Render(InLoop); + } + if (useSwitch && iter != ProcessedBranchesOut.end()) { + PrintIndented("break;\n"); + } + if (!First) Indenter::Unindent(); + if (useSwitch) { + PrintIndented("}\n"); + } + if (iter == ProcessedBranchesOut.end()) break; + } + if (!First) PrintIndented("}\n"); + + if (Fused) { + Fused->RenderLoopPostfix(); + } +} + +// Shape + +int Shape::IdCounter = 0; + +// MultipleShape + +void MultipleShape::RenderLoopPrefix() { + if (NeedLoop) { + if (Labeled) { + PrintIndented("L%d: do {\n", Id); + } else { + PrintIndented("do {\n"); + } + Indenter::Indent(); + } +} + +void MultipleShape::RenderLoopPostfix() { + if (NeedLoop) { + Indenter::Unindent(); + PrintIndented("} while(0);\n"); + } +} + +void MultipleShape::Render(bool InLoop) { + RenderLoopPrefix(); + bool First = true; + for (BlockShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) { + if (AsmJS) { + PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first->Id); + } else { + PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first->Id); + } + First = false; + Indenter::Indent(); + iter->second->Render(InLoop); + Indenter::Unindent(); + PrintIndented("}\n"); + } + RenderLoopPostfix(); + if (Next) Next->Render(InLoop); +}; + +// LoopShape + +void LoopShape::Render(bool InLoop) { + if (Labeled) { + PrintIndented("L%d: while(1) {\n", Id); + } else { + PrintIndented("while(1) {\n"); + } + Indenter::Indent(); + Inner->Render(true); + Indenter::Unindent(); + PrintIndented("}\n"); + if (Next) Next->Render(InLoop); +}; + +/* +// EmulatedShape + +void EmulatedShape::Render(bool InLoop) { + PrintIndented("while(1) {\n"); + Indenter::Indent(); + PrintIndented("switch(label) {\n"); + Indenter::Indent(); + for (int i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + PrintIndented("case %d: {\n", Curr->Id); + Indenter::Indent(); + Curr->Render(InLoop); + PrintIndented("break;\n"); + Indenter::Unindent(); + PrintIndented("}\n"); + } + Indenter::Unindent(); + PrintIndented("}\n"); + Indenter::Unindent(); + PrintIndented("}\n"); + if (Next) Next->Render(InLoop); +}; +*/ + +// Relooper + +Relooper::Relooper() : Root(NULL) { +} + +Relooper::~Relooper() { + for (int i = 0; i < Blocks.size(); i++) delete Blocks[i]; + for (int i = 0; i < Shapes.size(); i++) delete Shapes[i]; +} + +void Relooper::AddBlock(Block *New) { + Blocks.push_back(New); +} + +struct RelooperRecursor { + Relooper *Parent; + RelooperRecursor(Relooper *ParentInit) : Parent(ParentInit) {} +}; + +typedef std::list<Block*> BlockList; + +void Relooper::Calculate(Block *Entry) { + // Scan and optimize the input + struct PreOptimizer : public RelooperRecursor { + PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {} + BlockSet Live; + + void FindLive(Block *Root) { + BlockList ToInvestigate; + ToInvestigate.push_back(Root); + while (ToInvestigate.size() > 0) { + Block *Curr = ToInvestigate.front(); + ToInvestigate.pop_front(); + if (contains(Live, Curr)) continue; + Live.insert(Curr); + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + ToInvestigate.push_back(iter->first); + } + } + } + + // If a block has multiple entries but no exits, and it is small enough, it is useful to split it. + // A common example is a C++ function where everything ends up at a final exit block and does some + // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow + // reaching the final block + void SplitDeadEnds() { + int TotalCodeSize = 0; + for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { + Block *Curr = *iter; + TotalCodeSize += strlen(Curr->Code); + } + BlockSet Splits; + BlockSet Removed; + //DebugDump(Live, "before"); + for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { + Block *Original = *iter; + if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; // only dead ends, for now + if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node + if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort + // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original) + PrintDebug("Splitting block %d\n", Original->Id); + for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { + Block *Prior = *iter; + Block *Split = new Block(Original->Code, Original->BranchVar); + Parent->Blocks.push_back(Split); + PrintDebug(" to %d\n", Split->Id); + Split->BranchesIn.insert(Prior); + Branch *Details = Prior->BranchesOut[Original]; + Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code); + Prior->BranchesOut.erase(Original); + for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { + Block *Post = iter->first; + Branch *Details = iter->second; + Split->BranchesOut[Post] = new Branch(Details->Condition, Details->Code); + Post->BranchesIn.insert(Split); + } + Splits.insert(Split); + Removed.insert(Original); + } + for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { + Block *Post = iter->first; + Post->BranchesIn.erase(Original); + } + //DebugDump(Live, "mid"); + } + for (BlockSet::iterator iter = Splits.begin(); iter != Splits.end(); iter++) { + Live.insert(*iter); + } + for (BlockSet::iterator iter = Removed.begin(); iter != Removed.end(); iter++) { + Live.erase(*iter); + } + //DebugDump(Live, "after"); + } + }; + PreOptimizer Pre(this); + Pre.FindLive(Entry); + + // Add incoming branches from live blocks, ignoring dead code + for (int i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + if (!contains(Pre.Live, Curr)) continue; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + iter->first->BranchesIn.insert(Curr); + } + } + + Pre.SplitDeadEnds(); + + // Recursively process the graph + + struct Analyzer : public RelooperRecursor { + Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {} + + // Add a shape to the list of shapes in this Relooper calculation + void Notice(Shape *New) { + Parent->Shapes.push_back(New); + } + + // Create a list of entries from a block. If LimitTo is provided, only results in that set + // will appear + void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) { + for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) { + if (!LimitTo || contains(*LimitTo, iter->first)) { + Entries.insert(iter->first); + } + } + } + + // Converts/processes all branchings to a specific target + void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, BlockSet &From) { + PrintDebug("Solipsizing branches into %d\n", Target->Id); + DebugDump(From, " relevant to solipsize: "); + for (BlockSet::iterator iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) { + Block *Prior = *iter; + if (!contains(From, Prior)) { + iter++; + continue; + } + Branch *PriorOut = Prior->BranchesOut[Target]; + PriorOut->Ancestor = Ancestor; + PriorOut->Type = Type; + if (MultipleShape *Multiple = Shape::IsMultiple(Ancestor)) { + Multiple->NeedLoop++; // We are breaking out of this Multiple, so need a loop + } + iter++; // carefully increment iter before erasing + Target->BranchesIn.erase(Prior); + Target->ProcessedBranchesIn.insert(Prior); + Prior->BranchesOut.erase(Target); + Prior->ProcessedBranchesOut[Target] = PriorOut; + PrintDebug(" eliminated branch from %d\n", Prior->Id); + } + } + + Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { + PrintDebug("creating simple block with block #%d\n", Inner->Id); + SimpleShape *Simple = new SimpleShape; + Notice(Simple); + Simple->Inner = Inner; + Inner->Parent = Simple; + if (Blocks.size() > 1) { + Blocks.erase(Inner); + GetBlocksOut(Inner, NextEntries, &Blocks); + BlockSet JustInner; + JustInner.insert(Inner); + for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { + Solipsize(*iter, Branch::Direct, Simple, JustInner); + } + } + return Simple; + } + + Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) { + // Find the inner blocks in this loop. Proceed backwards from the entries until + // you reach a seen block, collecting as you go. + BlockSet InnerBlocks; + BlockSet Queue = Entries; + while (Queue.size() > 0) { + Block *Curr = *(Queue.begin()); + Queue.erase(Queue.begin()); + if (!contains(InnerBlocks, Curr)) { + // This element is new, mark it as inner and remove from outer + InnerBlocks.insert(Curr); + Blocks.erase(Curr); + // Add the elements prior to it + for (BlockSet::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) { + Queue.insert(*iter); + } +#if 0 + // Add elements it leads to, if they are dead ends. There is no reason not to hoist dead ends + // into loops, as it can avoid multiple entries after the loop + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (Target->BranchesIn.size() <= 1 && Target->BranchesOut.size() == 0) { + Queue.insert(Target); + } + } +#endif + } + } + assert(InnerBlocks.size() > 0); + + for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Possible = iter->first; + if (!contains(InnerBlocks, Possible)) { + NextEntries.insert(Possible); + } + } + } + +#if 0 + // We can avoid multiple next entries by hoisting them into the loop. + if (NextEntries.size() > 1) { + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(NextEntries, IndependentGroups, &InnerBlocks); + + while (IndependentGroups.size() > 0 && NextEntries.size() > 1) { + Block *Min = NULL; + int MinSize = 0; + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + Block *Entry = iter->first; + BlockSet &Blocks = iter->second; + if (!Min || Blocks.size() < MinSize) { // TODO: code size, not # of blocks + Min = Entry; + MinSize = Blocks.size(); + } + } + // check how many new entries this would cause + BlockSet &Hoisted = IndependentGroups[Min]; + bool abort = false; + for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end() && !abort; iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (Hoisted.find(Target) == Hoisted.end() && NextEntries.find(Target) == NextEntries.end()) { + // abort this hoisting + abort = true; + break; + } + } + } + if (abort) { + IndependentGroups.erase(Min); + continue; + } + // hoist this entry + PrintDebug("hoisting %d into loop\n", Min->Id); + NextEntries.erase(Min); + for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end(); iter++) { + Block *Curr = *iter; + InnerBlocks.insert(Curr); + Blocks.erase(Curr); + } + IndependentGroups.erase(Min); + } + } +#endif + + PrintDebug("creating loop block:\n"); + DebugDump(InnerBlocks, " inner blocks:"); + DebugDump(Entries, " inner entries:"); + DebugDump(Blocks, " outer blocks:"); + DebugDump(NextEntries, " outer entries:"); + + LoopShape *Loop = new LoopShape(); + Notice(Loop); + + // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations) + // A. Branches to the loop entries become a continue to this shape + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Solipsize(*iter, Branch::Continue, Loop, InnerBlocks); + } + // B. Branches to outside the loop (a next entry) become breaks on this shape + for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { + Solipsize(*iter, Branch::Break, Loop, InnerBlocks); + } + // Finish up + Shape *Inner = Process(InnerBlocks, Entries, NULL); + Loop->Inner = Inner; + return Loop; + } + + // For each entry, find the independent group reachable by it. The independent group is + // the entry itself, plus all the blocks it can reach that cannot be directly reached by another entry. Note that we + // ignore directly reaching the entry itself by another entry. + // @param Ignore - previous blocks that are irrelevant + void FindIndependentGroups(BlockSet &Entries, BlockBlockSetMap& IndependentGroups, BlockSet *Ignore=NULL) { + typedef std::map<Block*, Block*> BlockBlockMap; + + struct HelperClass { + BlockBlockSetMap& IndependentGroups; + BlockBlockMap Ownership; // For each block, which entry it belongs to. We have reached it from there. + + HelperClass(BlockBlockSetMap& IndependentGroupsInit) : IndependentGroups(IndependentGroupsInit) {} + void InvalidateWithChildren(Block *New) { // TODO: rename New + BlockList ToInvalidate; // Being in the list means you need to be invalidated + ToInvalidate.push_back(New); + while (ToInvalidate.size() > 0) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Block *Owner = Ownership[Invalidatee]; + if (contains(IndependentGroups, Owner)) { // Owner may have been invalidated, do not add to IndependentGroups! + IndependentGroups[Owner].erase(Invalidatee); + } + if (Ownership[Invalidatee]) { // may have been seen before and invalidated already + Ownership[Invalidatee] = NULL; + for (BlockBranchMap::iterator iter = Invalidatee->BranchesOut.begin(); iter != Invalidatee->BranchesOut.end(); iter++) { + Block *Target = iter->first; + BlockBlockMap::iterator Known = Ownership.find(Target); + if (Known != Ownership.end()) { + Block *TargetOwner = Known->second; + if (TargetOwner) { + ToInvalidate.push_back(Target); + } + } + } + } + } + } + }; + HelperClass Helper(IndependentGroups); + + // We flow out from each of the entries, simultaneously. + // When we reach a new block, we add it as belonging to the one we got to it from. + // If we reach a new block that is already marked as belonging to someone, it is reachable by + // two entries and is not valid for any of them. Remove it and all it can reach that have been + // visited. + + BlockList Queue; // Being in the queue means we just added this item, and we need to add its children + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Block *Entry = *iter; + Helper.Ownership[Entry] = Entry; + IndependentGroups[Entry].insert(Entry); + Queue.push_back(Entry); + } + while (Queue.size() > 0) { + Block *Curr = Queue.front(); + Queue.pop_front(); + Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership map if we are in the queue + if (!Owner) continue; // we have been invalidated meanwhile after being reached from two entries + // Add all children + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *New = iter->first; + BlockBlockMap::iterator Known = Helper.Ownership.find(New); + if (Known == Helper.Ownership.end()) { + // New node. Add it, and put it in the queue + Helper.Ownership[New] = Owner; + IndependentGroups[Owner].insert(New); + Queue.push_back(New); + continue; + } + Block *NewOwner = Known->second; + if (!NewOwner) continue; // We reached an invalidated node + if (NewOwner != Owner) { + // Invalidate this and all reachable that we have seen - we reached this from two locations + Helper.InvalidateWithChildren(New); + } + // otherwise, we have the same owner, so do nothing + } + } + + // Having processed all the interesting blocks, we remain with just one potential issue: + // If a->b, and a was invalidated, but then b was later reached by someone else, we must + // invalidate b. To check for this, we go over all elements in the independent groups, + // if an element has a parent which does *not* have the same owner, we must remove it + // and all its children. + + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + BlockSet &CurrGroup = IndependentGroups[*iter]; + BlockList ToInvalidate; + for (BlockSet::iterator iter = CurrGroup.begin(); iter != CurrGroup.end(); iter++) { + Block *Child = *iter; + for (BlockSet::iterator iter = Child->BranchesIn.begin(); iter != Child->BranchesIn.end(); iter++) { + Block *Parent = *iter; + if (Ignore && contains(*Ignore, Parent)) continue; + if (Helper.Ownership[Parent] != Helper.Ownership[Child]) { + ToInvalidate.push_back(Child); + } + } + } + while (ToInvalidate.size() > 0) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Helper.InvalidateWithChildren(Invalidatee); + } + } + + // Remove empty groups + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + if (IndependentGroups[*iter].size() == 0) { + IndependentGroups.erase(*iter); + } + } + +#if DEBUG + PrintDebug("Investigated independent groups:\n"); + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + DebugDump(iter->second, " group: "); + } +#endif + } + + Shape *MakeMultiple(BlockSet &Blocks, BlockSet& Entries, BlockBlockSetMap& IndependentGroups, Shape *Prev, BlockSet &NextEntries) { + PrintDebug("creating multiple block with %d inner groups\n", IndependentGroups.size()); + bool Fused = !!(Shape::IsSimple(Prev)); + MultipleShape *Multiple = new MultipleShape(); + Notice(Multiple); + BlockSet CurrEntries; + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + Block *CurrEntry = iter->first; + BlockSet &CurrBlocks = iter->second; + PrintDebug(" multiple group with entry %d:\n", CurrEntry->Id); + DebugDump(CurrBlocks, " "); + // Create inner block + CurrEntries.clear(); + CurrEntries.insert(CurrEntry); + for (BlockSet::iterator iter = CurrBlocks.begin(); iter != CurrBlocks.end(); iter++) { + Block *CurrInner = *iter; + // Remove the block from the remaining blocks + Blocks.erase(CurrInner); + // Find new next entries and fix branches to them + for (BlockBranchMap::iterator iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) { + Block *CurrTarget = iter->first; + BlockBranchMap::iterator Next = iter; + Next++; + if (!contains(CurrBlocks, CurrTarget)) { + NextEntries.insert(CurrTarget); + Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); + } + iter = Next; // increment carefully because Solipsize can remove us + } + } + Multiple->InnerMap[CurrEntry] = Process(CurrBlocks, CurrEntries, NULL); + // If we are not fused, then our entries will actually be checked + if (!Fused) { + CurrEntry->IsCheckedMultipleEntry = true; + } + } + DebugDump(Blocks, " remaining blocks after multiple:"); + // Add entries not handled as next entries, they are deferred + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Block *Entry = *iter; + if (!contains(IndependentGroups, Entry)) { + NextEntries.insert(Entry); + } + } + return Multiple; + } + + // Main function. + // Process a set of blocks with specified entries, returns a shape + // The Make* functions receive a NextEntries. If they fill it with data, those are the entries for the + // ->Next block on them, and the blocks are what remains in Blocks (which Make* modify). In this way + // we avoid recursing on Next (imagine a long chain of Simples, if we recursed we could blow the stack). + Shape *Process(BlockSet &Blocks, BlockSet& InitialEntries, Shape *Prev) { + PrintDebug("Process() called\n"); + BlockSet *Entries = &InitialEntries; + BlockSet TempEntries[2]; + int CurrTempIndex = 0; + BlockSet *NextEntries; + Shape *Ret = NULL; + #define Make(call) \ + Shape *Temp = call; \ + if (Prev) Prev->Next = Temp; \ + if (!Ret) Ret = Temp; \ + if (!NextEntries->size()) { PrintDebug("Process() returning\n"); return Ret; } \ + Prev = Temp; \ + Entries = NextEntries; \ + continue; + while (1) { + PrintDebug("Process() running\n"); + DebugDump(Blocks, " blocks : "); + DebugDump(*Entries, " entries: "); + + CurrTempIndex = 1-CurrTempIndex; + NextEntries = &TempEntries[CurrTempIndex]; + NextEntries->clear(); + + if (Entries->size() == 0) return Ret; + if (Entries->size() == 1) { + Block *Curr = *(Entries->begin()); + if (Curr->BranchesIn.size() == 0) { + // One entry, no looping ==> Simple + Make(MakeSimple(Blocks, Curr, *NextEntries)); + } + // One entry, looping ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + } + // More than one entry, try to eliminate through a Multiple groups of + // independent blocks from an entry/ies. It is important to remove through + // multiples as opposed to looping since the former is more performant. + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(*Entries, IndependentGroups); + + PrintDebug("Independent groups: %d\n", IndependentGroups.size()); + + if (IndependentGroups.size() > 0) { + // We can handle a group in a multiple if its entry cannot be reached by another group. + // Note that it might be reachable by itself - a loop. But that is fine, we will create + // a loop inside the multiple block (which is the performant order to do it). + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end();) { + Block *Entry = iter->first; + BlockSet &Group = iter->second; + BlockBlockSetMap::iterator curr = iter++; // iterate carefully, we may delete + for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) { + Block *Origin = *iterBranch; + if (!contains(Group, Origin)) { + // Reached from outside the group, so we cannot handle this + PrintDebug("Cannot handle group with entry %d because of incoming branch from %d\n", Entry->Id, Origin->Id); + IndependentGroups.erase(curr); + break; + } + } + } + + // As an optimization, if we have 2 independent groups, and one is a small dead end, we can handle only that dead end. + // The other then becomes a Next - without nesting in the code and recursion in the analysis. + // TODO: if the larger is the only dead end, handle that too + // TODO: handle >2 groups + // TODO: handle not just dead ends, but also that do not branch to the NextEntries. However, must be careful + // there since we create a Next, and that Next can prevent eliminating a break (since we no longer + // naturally reach the same place), which may necessitate a one-time loop, which makes the unnesting + // pointless. + if (IndependentGroups.size() == 2) { + // Find the smaller one + BlockBlockSetMap::iterator iter = IndependentGroups.begin(); + Block *SmallEntry = iter->first; + int SmallSize = iter->second.size(); + iter++; + Block *LargeEntry = iter->first; + int LargeSize = iter->second.size(); + if (SmallSize != LargeSize) { // ignore the case where they are identical - keep things symmetrical there + if (SmallSize > LargeSize) { + Block *Temp = SmallEntry; + SmallEntry = LargeEntry; + LargeEntry = Temp; // Note: we did not flip the Sizes too, they are now invalid. TODO: use the smaller size as a limit? + } + // Check if dead end + bool DeadEnd = true; + BlockSet &SmallGroup = IndependentGroups[SmallEntry]; + for (BlockSet::iterator iter = SmallGroup.begin(); iter != SmallGroup.end(); iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (!contains(SmallGroup, Target)) { + DeadEnd = false; + break; + } + } + if (!DeadEnd) break; + } + if (DeadEnd) { + PrintDebug("Removing nesting by not handling large group because small group is dead end\n"); + IndependentGroups.erase(LargeEntry); + } + } + } + + PrintDebug("Handleable independent groups: %d\n", IndependentGroups.size()); + + if (IndependentGroups.size() > 0) { + // Some groups removable ==> Multiple + Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, *NextEntries)); + } + } + // No independent groups, must be loopable ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + } + } + }; + + // Main + + BlockSet AllBlocks; + for (BlockSet::iterator iter = Pre.Live.begin(); iter != Pre.Live.end(); iter++) { + Block *Curr = *iter; + AllBlocks.insert(Curr); +#if DEBUG + PrintDebug("Adding block %d (%s)\n", Curr->Id, Curr->Code); +#endif + } + + BlockSet Entries; + Entries.insert(Entry); + Root = Analyzer(this).Process(AllBlocks, Entries, NULL); + assert(Root); + + // Post optimizations + + struct PostOptimizer { + Relooper *Parent; + void *Closure; + + PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {} + + #define RECURSE_Multiple(shape, func) \ + for (BlockShapeMap::iterator iter = shape->InnerMap.begin(); iter != shape->InnerMap.end(); iter++) { \ + func(iter->second); \ + } + #define RECURSE_Loop(shape, func) \ + func(shape->Inner); + #define RECURSE(shape, func) RECURSE_##shape(shape, func); + + #define SHAPE_SWITCH(var, simple, multiple, loop) \ + if (SimpleShape *Simple = Shape::IsSimple(var)) { \ + simple; \ + } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \ + multiple; \ + } else if (LoopShape *Loop = Shape::IsLoop(var)) { \ + loop; \ + } + + // Find the blocks that natural control flow can get us directly to, or through a multiple that we ignore + void FollowNaturalFlow(Shape *S, BlockSet &Out) { + SHAPE_SWITCH(S, { + Out.insert(Simple->Inner); + }, { + for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + FollowNaturalFlow(iter->second, Out); + } + FollowNaturalFlow(Multiple->Next, Out); + }, { + FollowNaturalFlow(Loop->Inner, Out); + }); + } + + void FindNaturals(Shape *Root, Shape *Otherwise=NULL) { + if (Root->Next) { + Root->Natural = Root->Next; + FindNaturals(Root->Next, Otherwise); + } else { + Root->Natural = Otherwise; + } + + SHAPE_SWITCH(Root, { + }, { + for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + FindNaturals(iter->second, Root->Natural); + } + }, { + FindNaturals(Loop->Inner, Loop->Inner); + }); + } + + // Remove unneeded breaks and continues. + // A flow operation is trivially unneeded if the shape we naturally get to by normal code + // execution is the same as the flow forces us to. + void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL, LoopShape *LastLoop=NULL) { + BlockSet NaturalBlocks; + FollowNaturalFlow(Natural, NaturalBlocks); + Shape *Next = Root; + while (Next) { + Root = Next; + Next = NULL; + SHAPE_SWITCH(Root, { + if (Simple->Inner->BranchVar) LastLoop = NULL; // a switch clears out the loop (TODO: only for breaks, not continue) + + // If there is a next block, we already know at Simple creation time to make direct branches, + // and we can do nothing more. If there is no next however, then Natural is where we will + // go to by doing nothing, so we can potentially optimize some branches to direct. + if (Simple->Next) { + Next = Simple->Next; + } else { + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Block *Target = iter->first; + Branch *Details = iter->second; + if (Details->Type != Branch::Direct && contains(NaturalBlocks, Target)) { // note: cannot handle split blocks + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { + Multiple->NeedLoop--; + } + } else if (Details->Type == Branch::Break && LastLoop && LastLoop->Natural == Details->Ancestor->Natural) { + // it is important to simplify breaks, as simpler breaks enable other optimizations + Details->Labeled = false; + if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { + Multiple->NeedLoop--; + } + } + } + } + }, { + for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + RemoveUnneededFlows(iter->second, Multiple->Next, Multiple->NeedLoop ? NULL : LastLoop); + } + Next = Multiple->Next; + }, { + RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop); + Next = Loop->Next; + }); + } + } + + // After we know which loops exist, we can calculate which need to be labeled + void FindLabeledLoops(Shape *Root) { + bool First = Closure == NULL; + if (First) { + Closure = (void*)(new std::stack<Shape*>); + } + std::stack<Shape*> &LoopStack = *((std::stack<Shape*>*)Closure); + + Shape *Next = Root; + while (Next) { + Root = Next; + Next = NULL; + + SHAPE_SWITCH(Root, { + MultipleShape *Fused = Shape::IsMultiple(Root->Next); + // If we are fusing a Multiple with a loop into this Simple, then visit it now + if (Fused && Fused->NeedLoop) { + LoopStack.push(Fused); + } + if (Simple->Inner->BranchVar) { + LoopStack.push(NULL); // a switch means breaks are now useless, push a dummy + } + if (Fused) { + RECURSE_Multiple(Fused, FindLabeledLoops); + } + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Block *Target = iter->first; + Branch *Details = iter->second; + if (Details->Type != Branch::Direct) { + assert(LoopStack.size() > 0); + if (Details->Ancestor != LoopStack.top() && Details->Labeled) { + LabeledShape *Labeled = Shape::IsLabeled(Details->Ancestor); + Labeled->Labeled = true; + } else { + Details->Labeled = false; + } + } + } + if (Simple->Inner->BranchVar) { + LoopStack.pop(); + } + if (Fused && Fused->NeedLoop) { + LoopStack.pop(); + } + if (Fused) { + Next = Fused->Next; + } else { + Next = Root->Next; + } + }, { + if (Multiple->NeedLoop) { + LoopStack.push(Multiple); + } + RECURSE(Multiple, FindLabeledLoops); + if (Multiple->NeedLoop) { + LoopStack.pop(); + } + Next = Root->Next; + }, { + LoopStack.push(Loop); + RECURSE(Loop, FindLabeledLoops); + LoopStack.pop(); + Next = Root->Next; + }); + } + + if (First) { + delete (std::stack<Shape*>*)Closure; + } + } + + void Process(Shape *Root) { + FindNaturals(Root); + RemoveUnneededFlows(Root); + FindLabeledLoops(Root); + } + }; + + PrintDebug("=== Optimizing shapes ===\n"); + + PostOptimizer(this).Process(Root); +} + +void Relooper::Render() { + OutputBuffer = OutputBufferRoot; + assert(Root); + Root->Render(false); +} + +void Relooper::SetOutputBuffer(char *Buffer, int Size) { + OutputBufferRoot = OutputBuffer = Buffer; + OutputBufferSize = Size; +} + +void Relooper::MakeOutputBuffer(int Size) { + OutputBufferRoot = OutputBuffer = (char*)malloc(Size); + OutputBufferSize = Size; +} + +void Relooper::SetAsmJSMode(int On) { + AsmJS = On; +} + +#if DEBUG +// Debugging + +void Debugging::Dump(BlockSet &Blocks, const char *prefix) { + if (prefix) printf("%s ", prefix); + for (BlockSet::iterator iter = Blocks.begin(); iter != Blocks.end(); iter++) { + Block *Curr = *iter; + printf("%d:\n", Curr->Id); + for (BlockBranchMap::iterator iter2 = Curr->BranchesOut.begin(); iter2 != Curr->BranchesOut.end(); iter2++) { + Block *Other = iter2->first; + printf(" -> %d\n", Other->Id); + assert(contains(Other->BranchesIn, Curr)); + } + } +} + +void Debugging::Dump(Shape *S, const char *prefix) { + if (prefix) printf("%s ", prefix); + if (!S) { + printf(" (null)\n"); + return; + } + printf(" %d ", S->Id); + SHAPE_SWITCH(S, { + printf("<< Simple with block %d\n", Simple->Inner->Id); + }, { + printf("<< Multiple\n"); + for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + printf(" with entry %d\n", iter->first->Id); + } + }, { + printf("<< Loop\n"); + }); +} + +static void PrintDebug(const char *Format, ...) { + printf("// "); + va_list Args; + va_start(Args, Format); + vprintf(Format, Args); + va_end(Args); +} +#endif + +// C API - useful for binding to other languages + +typedef std::map<void*, int> VoidIntMap; +VoidIntMap __blockDebugMap__; // maps block pointers in currently running code to block ids, for generated debug output + +extern "C" { + +void rl_set_output_buffer(char *buffer, int size) { +#if DEBUG + printf("#include \"Relooper.h\"\n"); + printf("int main() {\n"); + printf(" char buffer[100000];\n"); + printf(" rl_set_output_buffer(buffer);\n"); +#endif + Relooper::SetOutputBuffer(buffer, size); +} + +void rl_make_output_buffer(int size) { + Relooper::SetOutputBuffer((char*)malloc(size), size); +} + +void rl_set_asm_js_mode(int on) { + Relooper::SetAsmJSMode(on); +} + +void *rl_new_block(const char *text, const char *branch_var) { + Block *ret = new Block(text, branch_var); +#if DEBUG + printf(" void *b%d = rl_new_block(\"// code %d\");\n", ret->Id, ret->Id); + __blockDebugMap__[ret] = ret->Id; + printf(" block_map[%d] = b%d;\n", ret->Id, ret->Id); +#endif + return ret; +} + +void rl_delete_block(void *block) { +#if DEBUG + printf(" rl_delete_block(block_map[%d]);\n", ((Block*)block)->Id); +#endif + delete (Block*)block; +} + +void rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code) { +#if DEBUG + printf(" rl_block_add_branch_to(block_map[%d], block_map[%d], %s%s%s, %s%s%s);\n", ((Block*)from)->Id, ((Block*)to)->Id, condition ? "\"" : "", condition ? condition : "NULL", condition ? "\"" : "", code ? "\"" : "", code ? code : "NULL", code ? "\"" : ""); +#endif + ((Block*)from)->AddBranchTo((Block*)to, condition, code); +} + +void *rl_new_relooper() { +#if DEBUG + printf(" void *block_map[10000];\n"); + printf(" void *rl = rl_new_relooper();\n"); +#endif + return new Relooper; +} + +void rl_delete_relooper(void *relooper) { + delete (Relooper*)relooper; +} + +void rl_relooper_add_block(void *relooper, void *block) { +#if DEBUG + printf(" rl_relooper_add_block(rl, block_map[%d]);\n", ((Block*)block)->Id); +#endif + ((Relooper*)relooper)->AddBlock((Block*)block); +} + +void rl_relooper_calculate(void *relooper, void *entry) { +#if DEBUG + printf(" rl_relooper_calculate(rl, block_map[%d]);\n", ((Block*)entry)->Id); + printf(" rl_relooper_render(rl);\n"); + printf(" rl_delete_relooper(rl);\n"); + printf(" puts(buffer);\n"); + printf(" return 0;\n"); + printf("}\n"); +#endif + ((Relooper*)relooper)->Calculate((Block*)entry); +} + +void rl_relooper_render(void *relooper) { + ((Relooper*)relooper)->Render(); +} + +} + diff --git a/lib/Target/CppBackend/Relooper.h b/lib/Target/CppBackend/Relooper.h new file mode 100644 index 0000000000..f3dedf8c0e --- /dev/null +++ b/lib/Target/CppBackend/Relooper.h @@ -0,0 +1,251 @@ +/* +This is an optimized C++ implemention of the Relooper algorithm originally +developed as part of Emscripten. This implementation includes optimizations +added since the original academic paper [1] was published about it, and is +written in an LLVM-friendly way with the goal of inclusion in upstream +LLVM. + +[1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In Proceedings of the ACM international conference companion on Object oriented programming systems languages and applications companion (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 http://doi.acm.org/10.1145/2048147.2048224 +*/ + +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> + +#ifdef __cplusplus + +#include <map> +#include <deque> +#include <set> + +struct Block; +struct Shape; + +// Info about a branching from one block to another +struct Branch { + enum FlowType { + Direct = 0, // We will directly reach the right location through other means, no need for continue or break + Break = 1, + Continue = 2 + }; + Shape *Ancestor; // If not NULL, this shape is the relevant one for purposes of getting to the target block. We break or continue on it + Branch::FlowType Type; // If Ancestor is not NULL, this says whether to break or continue + bool Labeled; // If a break or continue, whether we need to use a label + const char *Condition; // The condition for which we branch. For example, "my_var == 1". Conditions are checked one by one. One of the conditions should have NULL as the condition, in which case it is the default + const char *Code; // If provided, code that is run right before the branch is taken. This is useful for phis + + Branch(const char *ConditionInit, const char *CodeInit=NULL); + ~Branch(); + + // Prints out the branch + void Render(Block *Target, bool SetLabel); +}; + +typedef std::set<Block*> BlockSet; +typedef std::map<Block*, Branch*> BlockBranchMap; + +// Represents a basic block of code - some instructions that end with a +// control flow modifier (a branch, return or throw). +struct Block { + // Branches become processed after we finish the shape relevant to them. For example, + // when we recreate a loop, branches to the loop start become continues and are now + // processed. When we calculate what shape to generate from a set of blocks, we ignore + // processed branches. + // Blocks own the Branch objects they use, and destroy them when done. + BlockBranchMap BranchesOut; + BlockSet BranchesIn; + BlockBranchMap ProcessedBranchesOut; + BlockSet ProcessedBranchesIn; + Shape *Parent; // The shape we are directly inside + int Id; // A unique identifier + const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input) + const char *BranchVar; // If we have more than one branch out, the variable whose value determines where we go + bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable + + Block(const char *CodeInit, const char *BranchVarInit); + ~Block(); + + void AddBranchTo(Block *Target, const char *Condition, const char *Code=NULL); + + // Prints out the instructions code and branchings + void Render(bool InLoop); + + // INTERNAL + static int IdCounter; +}; + +// Represents a structured control flow shape, one of +// +// Simple: No control flow at all, just instructions. If several +// blocks, then +// +// Multiple: A shape with more than one entry. If the next block to +// be entered is among them, we run it and continue to +// the next shape, otherwise we continue immediately to the +// next shape. +// +// Loop: An infinite loop. +// +// Emulated: Control flow is managed by a switch in a loop. This +// is necessary in some cases, for example when control +// flow is not known until runtime (indirect branches, +// setjmp returns, etc.) +// + +class SimpleShape; +class LabeledShape; +class MultipleShape; +class LoopShape; + +struct Shape { + int Id; // A unique identifier. Used to identify loops, labels are Lx where x is the Id. + Shape *Next; // The shape that will appear in the code right after this one + Shape *Natural; // The shape that control flow gets to naturally (if there is Next, then this is Next) + + enum ShapeType { + Simple, + Multiple, + Loop + }; + ShapeType Type; + + Shape(ShapeType TypeInit) : Id(Shape::IdCounter++), Next(NULL), Type(TypeInit) {} + virtual ~Shape() {} + + virtual void Render(bool InLoop) = 0; + + static SimpleShape *IsSimple(Shape *It) { return It && It->Type == Simple ? (SimpleShape*)It : NULL; } + static MultipleShape *IsMultiple(Shape *It) { return It && It->Type == Multiple ? (MultipleShape*)It : NULL; } + static LoopShape *IsLoop(Shape *It) { return It && It->Type == Loop ? (LoopShape*)It : NULL; } + static LabeledShape *IsLabeled(Shape *It) { return IsMultiple(It) || IsLoop(It) ? (LabeledShape*)It : NULL; } + + // INTERNAL + static int IdCounter; +}; + +struct SimpleShape : public Shape { + Block *Inner; + + SimpleShape() : Shape(Simple), Inner(NULL) {} + void Render(bool InLoop) { + Inner->Render(InLoop); + if (Next) Next->Render(InLoop); + } +}; + +typedef std::map<Block*, Shape*> BlockShapeMap; + +// A shape that may be implemented with a labeled loop. +struct LabeledShape : public Shape { + bool Labeled; // If we have a loop, whether it needs to be labeled + + LabeledShape(ShapeType TypeInit) : Shape(TypeInit), Labeled(false) {} +}; + +struct MultipleShape : public LabeledShape { + BlockShapeMap InnerMap; // entry block -> shape + int NeedLoop; // If we have branches, we need a loop. This is a counter of loop requirements, + // if we optimize it to 0, the loop is unneeded + + MultipleShape() : LabeledShape(Multiple), NeedLoop(0) {} + + void RenderLoopPrefix(); + void RenderLoopPostfix(); + + void Render(bool InLoop); +}; + +struct LoopShape : public LabeledShape { + Shape *Inner; + + LoopShape() : LabeledShape(Loop), Inner(NULL) {} + void Render(bool InLoop); +}; + +/* +struct EmulatedShape : public Shape { + std::deque<Block*> Blocks; + void Render(bool InLoop); +}; +*/ + +// Implements the relooper algorithm for a function's blocks. +// +// Usage: +// 1. Instantiate this struct. +// 2. Call AddBlock with the blocks you have. Each should already +// have its branchings in specified (the branchings out will +// be calculated by the relooper). +// 3. Call Render(). +// +// Implementation details: The Relooper instance has +// ownership of the blocks and shapes, and frees them when done. +struct Relooper { + std::deque<Block*> Blocks; + std::deque<Shape*> Shapes; + Shape *Root; + + Relooper(); + ~Relooper(); + + void AddBlock(Block *New); + + // Calculates the shapes + void Calculate(Block *Entry); + + // Renders the result. + void Render(); + + // Sets the global buffer all printing goes to. Must call this or MakeOutputBuffer. + static void SetOutputBuffer(char *Buffer, int Size); + + // Creates an output buffer. Must call this or SetOutputBuffer. + static void MakeOutputBuffer(int Size); + + // Sets asm.js mode on or off (default is off) + static void SetAsmJSMode(int On); +}; + +typedef std::map<Block*, BlockSet> BlockBlockSetMap; + +#if DEBUG +struct Debugging { + static void Dump(BlockSet &Blocks, const char *prefix=NULL); + static void Dump(Shape *S, const char *prefix=NULL); +}; +#endif + +#endif // __cplusplus + +// C API - useful for binding to other languages + +#ifdef _WIN32 + #ifdef RELOOPERDLL_EXPORTS + #define RELOOPERDLL_API __declspec(dllexport) + #else + #define RELOOPERDLL_API __declspec(dllimport) + #endif +#else + #define RELOOPERDLL_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +RELOOPERDLL_API void rl_set_output_buffer(char *buffer, int size); +RELOOPERDLL_API void rl_make_output_buffer(int size); +RELOOPERDLL_API void rl_set_asm_js_mode(int on); +RELOOPERDLL_API void *rl_new_block(const char *text, const char *branch_var); +RELOOPERDLL_API void rl_delete_block(void *block); +RELOOPERDLL_API void rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code); +RELOOPERDLL_API void *rl_new_relooper(); +RELOOPERDLL_API void rl_delete_relooper(void *relooper); +RELOOPERDLL_API void rl_relooper_add_block(void *relooper, void *block); +RELOOPERDLL_API void rl_relooper_calculate(void *relooper, void *entry); +RELOOPERDLL_API void rl_relooper_render(void *relooper); + +#ifdef __cplusplus +} +#endif + diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp index 1ca74a4895..339b36105c 100644 --- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp +++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -20,8 +20,8 @@ static unsigned CppBackend_TripleMatchQuality(const std::string &TT) { } extern "C" void LLVMInitializeCppBackendTargetInfo() { - TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp", - "C++ backend", + TargetRegistry::RegisterTarget(TheCppBackendTarget, "js", + "JavaScript backend", &CppBackend_TripleMatchQuality); } |