diff options
Diffstat (limited to 'lib/Bytecode')
-rw-r--r-- | lib/Bytecode/Makefile | 5 | ||||
-rw-r--r-- | lib/Bytecode/Reader/ConstantReader.cpp | 218 | ||||
-rw-r--r-- | lib/Bytecode/Reader/InstructionReader.cpp | 213 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Makefile | 7 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 478 | ||||
-rw-r--r-- | lib/Bytecode/Reader/ReaderInternals.h | 146 | ||||
-rw-r--r-- | lib/Bytecode/Writer/ConstantWriter.cpp | 154 | ||||
-rw-r--r-- | lib/Bytecode/Writer/InstructionWriter.cpp | 184 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Makefile | 7 | ||||
-rw-r--r-- | lib/Bytecode/Writer/SlotCalculator.cpp | 195 | ||||
-rw-r--r-- | lib/Bytecode/Writer/SlotCalculator.h | 96 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Writer.cpp | 182 | ||||
-rw-r--r-- | lib/Bytecode/Writer/WriterInternals.h | 74 |
13 files changed, 1959 insertions, 0 deletions
diff --git a/lib/Bytecode/Makefile b/lib/Bytecode/Makefile new file mode 100644 index 0000000000..75d4f7cd10 --- /dev/null +++ b/lib/Bytecode/Makefile @@ -0,0 +1,5 @@ +LEVEL = ../.. +DIRS = Reader Writer + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp new file mode 100644 index 0000000000..b85bd887ef --- /dev/null +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -0,0 +1,218 @@ +//===- ReadConst.cpp - Code to constants and constant pools -----------------=== +// +// This file implements functionality to deserialize constants and entire +// constant pools. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +//===------------------------------------------------------------------------=== + +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::parseTypeConstant(const uchar *&Buf, const uchar *EndBuf, + ConstPoolVal *&V) { + const Type *Val = 0; + + unsigned PrimType; + if (read_vbr(Buf, EndBuf, PrimType)) return true; + + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) { + V = new ConstPoolType(Val); // It's just a primitive ID. + return false; + } + + switch (PrimType) { + case Type::MethodTyID: { + unsigned Typ; + if (read_vbr(Buf, EndBuf, Typ)) return true; + const Type *RetType = getType(Typ); + if (RetType == 0) return true; + + MethodType::ParamTypes Params; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Params.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = MethodType::getMethodType(RetType, Params); + break; + } + case Type::ArrayTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + + int NumElements; + if (read_vbr(Buf, EndBuf, NumElements)) return true; + Val = ArrayType::getArrayType(ElementType, NumElements); + break; + } + case Type::StructTyID: { + unsigned Typ; + StructType::ElementTypes Elements; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { // List is terminated by void/0 typeid + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Elements.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = StructType::getStructType(Elements); + break; + } + case Type::PointerTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + Val = PointerType::getPointerType(ElementType); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to deserialize" + << " primitive Type " << PrimType << "\n"; + return true; + } + + V = new ConstPoolType(Val); + return false; +} + +bool BytecodeParser::parseConstPoolValue(const uchar *&Buf, + const uchar *EndBuf, + const Type *Ty, ConstPoolVal *&V) { + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (Val != 0 && Val != 1) return true; + V = new ConstPoolBool(Val == 1); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolUInt::isValueValidForType(Ty, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::ULongTyID: { + uint64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::SByteTyID: // Unsigned integer types... + case Type::ShortTyID: + case Type::IntTyID: { + int Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolSInt::isValueValidForType(Ty, Val)) return 0; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::LongTyID: { + int64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::TypeTyID: + if (parseTypeConstant(Buf, EndBuf, V)) return true; + break; + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)Ty; + unsigned NumElements; + if (AT->isSized()) // Sized array, # elements stored in type! + NumElements = (unsigned)AT->getNumElements(); + else // Unsized array, # elements stored in stream! + if (read_vbr(Buf, EndBuf, NumElements)) return true; + + vector<ConstPoolVal *> Elements; + while (NumElements--) { // Read all of the elements of the constant. + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(AT->getElementType(), Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + V = new ConstPoolArray(AT, Elements); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)Ty; + const StructType::ElementTypes &ET = ST->getElementTypes(); + + vector<ConstPoolVal *> Elements; + for (unsigned i = 0; i < ET.size(); ++i) { + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(ET[i], Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + + V = new ConstPoolStruct(ST, Elements); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ + << ": Don't know how to deserialize constant value of type '" + << Ty->getName() << "'\n"; + return true; + } + return false; +} + +bool BytecodeParser::ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, + SymTabValue::ConstantPoolType &CP, + ValueTable &Tab) { + while (Buf < EndBuf) { + unsigned NumEntries, Typ; + + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + ConstPoolVal *I; + if (parseConstPoolValue(Buf, EndBuf, Ty, I)) return true; +#if 0 + cerr << " Read const value: <" << I->getType()->getName() + << ">: " << I->getStrValue() << endl; +#endif + insertValue(I, Tab); + CP.insert(I); + } + } + + return Buf > EndBuf; +} diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp new file mode 100644 index 0000000000..667e144673 --- /dev/null +++ b/lib/Bytecode/Reader/InstructionReader.cpp @@ -0,0 +1,213 @@ +//===- ReadInst.cpp - Code to read an instruction from bytecode -------------=== +// +// This file defines the mechanism to read an instruction from a bytecode +// stream. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Change from getValue(Raw.Arg1) etc, to getArg(Raw, 1) +// Make it check type, so that casts are checked. +// +//===------------------------------------------------------------------------=== + +#include "llvm/iOther.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::ParseRawInst(const uchar *&Buf, const uchar *EndBuf, + RawInst &Result) { + unsigned Op, Typ; + if (read(Buf, EndBuf, Op)) return true; + + Result.NumOperands = Op >> 30; + Result.Opcode = (Op >> 24) & 63; + + switch (Result.NumOperands) { + case 1: + Result.Ty = getType((Op >> 12) & 4095); + Result.Arg1 = Op & 4095; + if (Result.Arg1 == 4095) // Handle special encoding for 0 operands... + Result.NumOperands = 0; + break; + case 2: + Result.Ty = getType((Op >> 16) & 255); + Result.Arg1 = (Op >> 8 ) & 255; + Result.Arg2 = (Op >> 0 ) & 255; + break; + case 3: + Result.Ty = getType((Op >> 18) & 63); + Result.Arg1 = (Op >> 12) & 63; + Result.Arg2 = (Op >> 6 ) & 63; + Result.Arg3 = (Op >> 0 ) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + if (read_vbr(Buf, EndBuf, Result.Opcode)) return true; + if (read_vbr(Buf, EndBuf, Typ)) return true; + Result.Ty = getType(Typ); + if (read_vbr(Buf, EndBuf, Result.NumOperands)) return true; + + switch (Result.NumOperands) { + case 0: + cerr << "Zero Arg instr found!\n"; + return true; // This encoding is invalid! + case 1: + if (read_vbr(Buf, EndBuf, Result.Arg1)) return true; + break; + case 2: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + break; + case 3: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2) || + read_vbr(Buf, EndBuf, Result.Arg3)) return true; + break; + default: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + + // Allocate a vector to hold arguments 3, 4, 5, 6 ... + Result.VarArgs = new vector<unsigned>(Result.NumOperands-2); + for (unsigned a = 0; a < Result.NumOperands-2; a++) + if (read_vbr(Buf, EndBuf, (*Result.VarArgs)[a])) return true; + break; + } + if (align32(Buf, EndBuf)) return true; + break; + } + + //cerr << "NO: " << Result.NumOperands << " opcode: " << Result.Opcode + // << " Ty: " << Result.Ty->getName() << " arg1: " << Result.Arg1 << endl; + return false; +} + + +bool BytecodeParser::ParseInstruction(const uchar *&Buf, const uchar *EndBuf, + Instruction *&Res) { + RawInst Raw; + if (ParseRawInst(Buf, EndBuf, Raw)) return true;; + + if (Raw.Opcode >= Instruction::FirstUnaryOp && + Raw.Opcode < Instruction::NumUnaryOps && Raw.NumOperands == 1) { + Res = Instruction::getUnaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1)); + return false; + } else if (Raw.Opcode >= Instruction::FirstBinaryOp && + Raw.Opcode < Instruction::NumBinaryOps && Raw.NumOperands == 2) { + Res = Instruction::getBinaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1), + getValue(Raw.Ty, Raw.Arg2)); + return false; + } else if (Raw.Opcode == Instruction::PHINode) { + PHINode *PN = new PHINode(Raw.Ty); + switch (Raw.NumOperands) { + case 0: cerr << "Invalid phi node encountered!\n"; + delete PN; + return true; + case 1: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); break; + case 2: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); break; + case 3: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg3)); break; + default: + PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + { + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) + PN->addIncoming(getValue(Raw.Ty, args[i])); + } + delete Raw.VarArgs; + } + Res = PN; + return false; + } else if (Raw.Opcode == Instruction::Ret) { + if (Raw.NumOperands == 0) { + Res = new ReturnInst(); return false; + } else if (Raw.NumOperands == 1) { + Res = new ReturnInst(getValue(Raw.Ty, Raw.Arg1)); return false; + } + } else if (Raw.Opcode == Instruction::Br) { + if (Raw.NumOperands == 1) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1)); + return false; + } else if (Raw.NumOperands == 3) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2), + getValue(Type::BoolTy , Raw.Arg3)); + return false; + } + } else if (Raw.Opcode == Instruction::Switch) { + SwitchInst *I = + new SwitchInst(getValue(Raw.Ty, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2)); + Res = I; + if (Raw.NumOperands < 3) return false; // No destinations? Wierd. + + if (Raw.NumOperands == 3 || Raw.VarArgs->size() & 1) { + cerr << "Switch statement with odd number of arguments!\n"; + delete I; + return true; + } + + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i += 2) + I->dest_push_back((ConstPoolVal*)getValue(Raw.Ty, args[i]), + (BasicBlock*)getValue(Type::LabelTy, args[i+1])); + + delete Raw.VarArgs; + return false; + } else if (Raw.Opcode == Instruction::Call) { + Method *M = (Method*)getValue(Raw.Ty, Raw.Arg1); + if (M == 0) return true; + + const MethodType::ParamTypes &PL = M->getMethodType()->getParamTypes(); + MethodType::ParamTypes::const_iterator It = PL.begin(); + + vector<Value *> Params; + switch (Raw.NumOperands) { + case 0: cerr << "Invalid call instruction encountered!\n"; + return true; + case 1: break; + case 2: Params.push_back(getValue(*It++, Raw.Arg2)); break; + case 3: Params.push_back(getValue(*It++, Raw.Arg2)); + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, Raw.Arg3)); break; + default: + Params.push_back(getValue(*It++, Raw.Arg2)); + { + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) { + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, args[i])); + } + } + delete Raw.VarArgs; + } + if (It != PL.end()) return true; + + Res = new CallInst(M, Params); + return false; + } else if (Raw.Opcode == Instruction::Malloc) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new MallocInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Alloca) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new AllocaInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Free) { + Value *Val = getValue(Raw.Ty, Raw.Arg1); + if (!Val->getType()->isPointerType()) return true; + Res = new FreeInst(Val); + return false; + } + + cerr << "Unrecognized instruction! " << Raw.Opcode << endl; + return true; +} diff --git a/lib/Bytecode/Reader/Makefile b/lib/Bytecode/Reader/Makefile new file mode 100644 index 0000000000..2c79d15104 --- /dev/null +++ b/lib/Bytecode/Reader/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcreader + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp new file mode 100644 index 0000000000..c3f4c907fe --- /dev/null +++ b/lib/Bytecode/Reader/Reader.cpp @@ -0,0 +1,478 @@ +//===- Reader.cpp - Code to read bytecode files -----------------------------=== +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Make error message outputs be configurable depending on an option? +// TODO: Allow passing in an option to ignore the symbol table +// +//===------------------------------------------------------------------------=== + +#include "llvm/Bytecode/Reader.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "ReaderInternals.h" +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <algorithm> + +bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) { + if (Ty->isPrimitiveType()) { + Slot = Ty->getPrimitiveID(); + } else { + TypeMapType::iterator I = TypeMap.find(Ty); + if (I == TypeMap.end()) return true; // Didn't find type! + Slot = I->second; + } + //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl; + return false; +} + +const Type *BytecodeParser::getType(unsigned ID) { + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID); + if (T) return T; + + //cerr << "Looking up Type ID: " << ID << endl; + + const Value *D = getValue(Type::TypeTy, ID, false); + if (D == 0) return 0; + + assert(D->getType() == Type::TypeTy && + D->getValueType() == Value::ConstantVal); + + + return ((const ConstPoolType*)D)->getValue();; +} + +bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) { + unsigned type; + if (getTypeSlot(Def->getType(), type)) return true; + + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + + //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() + // << "] = " << Def << endl; + + if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) { + const Type *Ty = ((const ConstPoolType*)Def)->getValue(); + unsigned ValueOffset = FirstDerivedTyID; + + if (&ValueTab == &Values) // Take into consideration module level types + ValueOffset += ModuleValues[type].size(); + + if (TypeMap.find(Ty) == TypeMap.end()) + TypeMap[Ty] = ValueTab[type].size()+ValueOffset; + } + + ValueTab[type].push_back(Def); + + return false; +} + +Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { + unsigned Num = oNum; + unsigned type; // The type plane it lives in... + + if (getTypeSlot(Ty, type)) return 0; // TODO: true + + if (type == Type::TypeTyID) { // The 'type' plane has implicit values + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num); + if (T) return (Value*)T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + Num -= FirstDerivedTyID; + } + + if (ModuleValues.size() > type) { + if (ModuleValues[type].size() > Num) + return ModuleValues[type][Num]; + Num -= ModuleValues[type].size(); + } + + if (Values.size() > type && Values[type].size() > Num) + return Values[type][Num]; + + if (!Create) return 0; // Do not create a placeholder? + + Value *d = 0; + switch (Ty->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; + case Type::MethodTyID: + cerr << "Creating method pholder! : " << type << ":" << oNum << " " + << Ty->getName() << endl; + d = new MethPHolder(Ty, oNum); + insertValue(d, LateResolveModuleValues); + return d; + default: d = new DefPHolder(Ty, oNum); break; + } + + assert(d != 0 && "How did we not make something?"); + if (insertValue(d, LateResolveValues)) return 0; + return d; +} + +bool BytecodeParser::postResolveValues(ValueTable &ValTab) { + bool Error = false; + for (unsigned ty = 0; ty < ValTab.size(); ty++) { + ValueList &DL = ValTab[ty]; + unsigned Size; + while ((Size = DL.size())) { + unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); + + Value *D = DL[Size-1]; + DL.pop_back(); + + Value *NewDef = getValue(D->getType(), IDNumber, false); + if (NewDef == 0) { + Error = true; // Unresolved thinger + cerr << "Unresolvable reference found: <" << D->getType()->getName() + << ">:" << IDNumber << "!\n"; + } else { + // Fixup all of the uses of this placeholder def... + D->replaceAllUsesWith(NewDef); + + // Now that all the uses are gone, delete the placeholder... + // If we couldn't find a def (error case), then leak a little + delete D; // memory, 'cause otherwise we can't remove all uses! + } + } + } + + return Error; +} + +bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf, + BasicBlock *&BB) { + BB = new BasicBlock(); + + while (Buf < EndBuf) { + Instruction *Def; + if (ParseInstruction(Buf, EndBuf, Def)) { + delete BB; + return true; + } + + if (Def == 0) { delete BB; return true; } + if (insertValue(Def, Values)) { delete BB; return true; } + + BB->getInstList().push_back(Def); + } + + return false; +} + +bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) { + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries, Typ; + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + // Symtab entry: [def slot #][name] + unsigned slot; + if (read_vbr(Buf, EndBuf, slot)) return true; + string Name; + if (read(Buf, EndBuf, Name, false)) // Not aligned... + return true; + + Value *D = getValue(Ty, slot, false); // Find mapping... + if (D == 0) return true; + D->setName(Name); + } + } + + return Buf > EndBuf; +} + + +bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf, + Module *C) { + // Clear out the local values table... + Values.clear(); + if (MethodSignatureList.empty()) return true; // Unexpected method! + + const MethodType *MTy = MethodSignatureList.front().first; + unsigned MethSlot = MethodSignatureList.front().second; + MethodSignatureList.pop_front(); + Method *M = new Method(MTy); + + const MethodType::ParamTypes &Params = MTy->getParamTypes(); + for (MethodType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); It++) { + MethodArgument *MA = new MethodArgument(*It); + if (insertValue(MA, Values)) { delete M; return true; } + M->getArgumentList().push_back(MA); + } + + while (Buf < EndBuf) { + unsigned Type, Size; + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; } + + switch (Type) { + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) { + cerr << "Error reading constant pool!\n"; + delete M; return true; + } + break; + + case BytecodeFormat::BasicBlock: { + BasicBlock *BB; + if (ParseBasicBlock(Buf, Buf+Size, BB) || + insertValue(BB, Values)) { + cerr << "Error parsing basic block!\n"; + delete M; return true; // Parse error... :( + } + + M->getBasicBlocks().push_back(BB); + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading method symbol table!\n"; + delete M; return true; + } + break; + + default: + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { + delete M; // Malformed bc file, read past end of block. + return true; + } + } + + if (postResolveValues(LateResolveValues) || + postResolveValues(LateResolveModuleValues)) { + delete M; return true; // Unresolvable references! + } + + Value *MethPHolder = getValue(MTy, MethSlot, false); + assert(MethPHolder && "Something is broken no placeholder found!"); + assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?"); + + unsigned type; // Type slot + assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); + getTypeSlot(MTy, type); + + C->getMethodList().push_back(M); + + // Replace placeholder with the real method pointer... + ModuleValues[type][MethSlot] = M; + + // If anyone is using the placeholder make them use the real method instead + MethPHolder->replaceAllUsesWith(M); + + // We don't need the placeholder anymore! + delete MethPHolder; + + return false; +} + +bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, + Module *C) { + + if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks? + + // Read the method signatures for all of the methods that are coming, and + // create fillers in the Value tables. + unsigned MethSignature; + if (read_vbr(Buf, End, MethSignature)) return true; + while (MethSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(MethSignature); + if (!Ty || !Ty->isMethodType()) { + cerr << "Method not meth type! "; + if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl; + return true; + } + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + + // Insert the placeholder... + Value *Def = new MethPHolder(Ty, 0); + insertValue(Def, ModuleValues); + + // Figure out which entry of its typeslot it went into... + unsigned TypeSlot; + if (getTypeSlot(Def->getType(), TypeSlot)) return true; + + unsigned SlotNo = ModuleValues[TypeSlot].size()-1; + + // Keep track of this information in a linked list that is emptied as + // methods are loaded... + // + MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo)); + if (read_vbr(Buf, End, MethSignature)) return true; + } + + if (align32(Buf, End)) return true; + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + return false; +} + +bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf, + Module *&C) { + + unsigned Type, Size; + if (readBlock(Buf, EndBuf, Type, Size)) return true; + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + return true; // Hrm, not a class? + + MethodSignatureList.clear(); // Just in case... + + // Read into instance variables... + if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true; + if (align32(Buf, EndBuf)) return true; + + C = new Module(); + + while (Buf < EndBuf) { + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; } + switch (Type) { + case BytecodeFormat::ModuleGlobalInfo: + if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) { + cerr << "Error reading class global info section!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) { + cerr << "Error reading class constant pool!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::Method: { + if (ParseMethod(Buf, Buf+Size, C)) { + delete C; return true; // Error parsing method + } + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading class symbol table!\n"; + delete C; return true; + } + break; + + default: + cerr << "Unknown class block: " << Type << endl; + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { delete C; return true; } + } + + if (!MethodSignatureList.empty()) // Expected more methods! + return true; + return false; +} + +Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { + LateResolveValues.clear(); + unsigned Sig; + // Read and check signature... + if (read(Buf, EndBuf, Sig) || + Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) + return 0; // Invalid signature! + + Module *Result; + if (ParseModule(Buf, EndBuf, Result)) return 0; + return Result; +} + + +Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) { + BytecodeParser Parser; + return Parser.ParseBytecode(Buffer, Buffer+Length); +} + +// Parse and return a class file... +// +Module *ParseBytecodeFile(const string &Filename) { + struct stat StatBuf; + Module *Result = 0; + + if (Filename != string("-")) { // Read from a file... + int FD = open(Filename.data(), O_RDONLY); + if (FD == -1) return 0; + + if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; } + + int Length = StatBuf.st_size; + if (Length == 0) { close(FD); return 0; } + uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ, + MAP_PRIVATE, FD, 0); + if (Buffer == (uchar*)-1) { close(FD); return 0; } + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buffer, Buffer+Length); + + munmap((char*)Buffer, Length); + close(FD); + } else { // Read from stdin + size_t FileSize = 0; + int BlockSize; + uchar Buffer[4096], *FileData = 0; + while ((BlockSize = read(0, Buffer, 4))) { + if (BlockSize == -1) { free(FileData); return 0; } + + FileData = (uchar*)realloc(FileData, FileSize+BlockSize); + memcpy(FileData+FileSize, Buffer, BlockSize); + FileSize += BlockSize; + } + + if (FileSize == 0) { free(FileData); return 0; } + +#define ALIGN_PTRS 1 +#if ALIGN_PTRS + uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert((Buf != (uchar*)-1) && "mmap returned error!"); + free(FileData); + memcpy(Buf, FileData, FileSize); +#else + uchar *Buf = FileData; +#endif + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buf, Buf+FileSize); + +#if ALIGN_PTRS + munmap((char*)Buf, FileSize); // Free mmap'd data area +#else + free(FileData); // Free realloc'd block of memory +#endif + } + + return Result; +} diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h new file mode 100644 index 0000000000..3bb0472674 --- /dev/null +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -0,0 +1,146 @@ +//===-- ReaderInternals.h - Definitions internal to the reader ---*- C++ -*--=// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef READER_INTERNALS_H +#define READER_INTERNALS_H + +#include "llvm/Bytecode/Primitives.h" +#include "llvm/SymTabValue.h" +#include "llvm/Method.h" +#include "llvm/Instruction.h" +#include <map> +#include <utility> + +class BasicBlock; +class Method; +class Module; +class Type; + +typedef unsigned char uchar; + +struct RawInst { // The raw fields out of the bytecode stream... + unsigned NumOperands; + unsigned Opcode; + const Type *Ty; |