diff options
author | Chris Lattner <sabre@nondot.org> | 2001-06-06 20:29:01 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2001-06-06 20:29:01 +0000 |
commit | 009505452b713ed2e3a8e99c5545a6e721c65495 (patch) | |
tree | 136a71c5b87bdf534d1f20a67558b49226b5a4d6 /lib/Bytecode/Reader/Reader.cpp | |
parent | 8d0afd3d32d1d67f9aa5df250a1d6955aa8f1ac9 (diff) |
Initial revision
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@2 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Bytecode/Reader/Reader.cpp')
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp new file mode 100644 index 0000000000..c3f4c907fe --- /dev/null +++ b/lib/Bytecode/Reader/Reader.cpp @@ -0,0 +1,478 @@ +//===- Reader.cpp - Code to read bytecode files -----------------------------=== +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Make error message outputs be configurable depending on an option? +// TODO: Allow passing in an option to ignore the symbol table +// +//===------------------------------------------------------------------------=== + +#include "llvm/Bytecode/Reader.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "ReaderInternals.h" +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <algorithm> + +bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) { + if (Ty->isPrimitiveType()) { + Slot = Ty->getPrimitiveID(); + } else { + TypeMapType::iterator I = TypeMap.find(Ty); + if (I == TypeMap.end()) return true; // Didn't find type! + Slot = I->second; + } + //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl; + return false; +} + +const Type *BytecodeParser::getType(unsigned ID) { + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID); + if (T) return T; + + //cerr << "Looking up Type ID: " << ID << endl; + + const Value *D = getValue(Type::TypeTy, ID, false); + if (D == 0) return 0; + + assert(D->getType() == Type::TypeTy && + D->getValueType() == Value::ConstantVal); + + + return ((const ConstPoolType*)D)->getValue();; +} + +bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) { + unsigned type; + if (getTypeSlot(Def->getType(), type)) return true; + + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + + //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() + // << "] = " << Def << endl; + + if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) { + const Type *Ty = ((const ConstPoolType*)Def)->getValue(); + unsigned ValueOffset = FirstDerivedTyID; + + if (&ValueTab == &Values) // Take into consideration module level types + ValueOffset += ModuleValues[type].size(); + + if (TypeMap.find(Ty) == TypeMap.end()) + TypeMap[Ty] = ValueTab[type].size()+ValueOffset; + } + + ValueTab[type].push_back(Def); + + return false; +} + +Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { + unsigned Num = oNum; + unsigned type; // The type plane it lives in... + + if (getTypeSlot(Ty, type)) return 0; // TODO: true + + if (type == Type::TypeTyID) { // The 'type' plane has implicit values + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num); + if (T) return (Value*)T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + Num -= FirstDerivedTyID; + } + + if (ModuleValues.size() > type) { + if (ModuleValues[type].size() > Num) + return ModuleValues[type][Num]; + Num -= ModuleValues[type].size(); + } + + if (Values.size() > type && Values[type].size() > Num) + return Values[type][Num]; + + if (!Create) return 0; // Do not create a placeholder? + + Value *d = 0; + switch (Ty->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; + case Type::MethodTyID: + cerr << "Creating method pholder! : " << type << ":" << oNum << " " + << Ty->getName() << endl; + d = new MethPHolder(Ty, oNum); + insertValue(d, LateResolveModuleValues); + return d; + default: d = new DefPHolder(Ty, oNum); break; + } + + assert(d != 0 && "How did we not make something?"); + if (insertValue(d, LateResolveValues)) return 0; + return d; +} + +bool BytecodeParser::postResolveValues(ValueTable &ValTab) { + bool Error = false; + for (unsigned ty = 0; ty < ValTab.size(); ty++) { + ValueList &DL = ValTab[ty]; + unsigned Size; + while ((Size = DL.size())) { + unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); + + Value *D = DL[Size-1]; + DL.pop_back(); + + Value *NewDef = getValue(D->getType(), IDNumber, false); + if (NewDef == 0) { + Error = true; // Unresolved thinger + cerr << "Unresolvable reference found: <" << D->getType()->getName() + << ">:" << IDNumber << "!\n"; + } else { + // Fixup all of the uses of this placeholder def... + D->replaceAllUsesWith(NewDef); + + // Now that all the uses are gone, delete the placeholder... + // If we couldn't find a def (error case), then leak a little + delete D; // memory, 'cause otherwise we can't remove all uses! + } + } + } + + return Error; +} + +bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf, + BasicBlock *&BB) { + BB = new BasicBlock(); + + while (Buf < EndBuf) { + Instruction *Def; + if (ParseInstruction(Buf, EndBuf, Def)) { + delete BB; + return true; + } + + if (Def == 0) { delete BB; return true; } + if (insertValue(Def, Values)) { delete BB; return true; } + + BB->getInstList().push_back(Def); + } + + return false; +} + +bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) { + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries, Typ; + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + // Symtab entry: [def slot #][name] + unsigned slot; + if (read_vbr(Buf, EndBuf, slot)) return true; + string Name; + if (read(Buf, EndBuf, Name, false)) // Not aligned... + return true; + + Value *D = getValue(Ty, slot, false); // Find mapping... + if (D == 0) return true; + D->setName(Name); + } + } + + return Buf > EndBuf; +} + + +bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf, + Module *C) { + // Clear out the local values table... + Values.clear(); + if (MethodSignatureList.empty()) return true; // Unexpected method! + + const MethodType *MTy = MethodSignatureList.front().first; + unsigned MethSlot = MethodSignatureList.front().second; + MethodSignatureList.pop_front(); + Method *M = new Method(MTy); + + const MethodType::ParamTypes &Params = MTy->getParamTypes(); + for (MethodType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); It++) { + MethodArgument *MA = new MethodArgument(*It); + if (insertValue(MA, Values)) { delete M; return true; } + M->getArgumentList().push_back(MA); + } + + while (Buf < EndBuf) { + unsigned Type, Size; + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; } + + switch (Type) { + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) { + cerr << "Error reading constant pool!\n"; + delete M; return true; + } + break; + + case BytecodeFormat::BasicBlock: { + BasicBlock *BB; + if (ParseBasicBlock(Buf, Buf+Size, BB) || + insertValue(BB, Values)) { + cerr << "Error parsing basic block!\n"; + delete M; return true; // Parse error... :( + } + + M->getBasicBlocks().push_back(BB); + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading method symbol table!\n"; + delete M; return true; + } + break; + + default: + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { + delete M; // Malformed bc file, read past end of block. + return true; + } + } + + if (postResolveValues(LateResolveValues) || + postResolveValues(LateResolveModuleValues)) { + delete M; return true; // Unresolvable references! + } + + Value *MethPHolder = getValue(MTy, MethSlot, false); + assert(MethPHolder && "Something is broken no placeholder found!"); + assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?"); + + unsigned type; // Type slot + assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); + getTypeSlot(MTy, type); + + C->getMethodList().push_back(M); + + // Replace placeholder with the real method pointer... + ModuleValues[type][MethSlot] = M; + + // If anyone is using the placeholder make them use the real method instead + MethPHolder->replaceAllUsesWith(M); + + // We don't need the placeholder anymore! + delete MethPHolder; + + return false; +} + +bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, + Module *C) { + + if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks? + + // Read the method signatures for all of the methods that are coming, and + // create fillers in the Value tables. + unsigned MethSignature; + if (read_vbr(Buf, End, MethSignature)) return true; + while (MethSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(MethSignature); + if (!Ty || !Ty->isMethodType()) { + cerr << "Method not meth type! "; + if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl; + return true; + } + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + + // Insert the placeholder... + Value *Def = new MethPHolder(Ty, 0); + insertValue(Def, ModuleValues); + + // Figure out which entry of its typeslot it went into... + unsigned TypeSlot; + if (getTypeSlot(Def->getType(), TypeSlot)) return true; + + unsigned SlotNo = ModuleValues[TypeSlot].size()-1; + + // Keep track of this information in a linked list that is emptied as + // methods are loaded... + // + MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo)); + if (read_vbr(Buf, End, MethSignature)) return true; + } + + if (align32(Buf, End)) return true; + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + return false; +} + +bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf, + Module *&C) { + + unsigned Type, Size; + if (readBlock(Buf, EndBuf, Type, Size)) return true; + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + return true; // Hrm, not a class? + + MethodSignatureList.clear(); // Just in case... + + // Read into instance variables... + if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true; + if (align32(Buf, EndBuf)) return true; + + C = new Module(); + + while (Buf < EndBuf) { + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; } + switch (Type) { + case BytecodeFormat::ModuleGlobalInfo: + if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) { + cerr << "Error reading class global info section!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) { + cerr << "Error reading class constant pool!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::Method: { + if (ParseMethod(Buf, Buf+Size, C)) { + delete C; return true; // Error parsing method + } + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading class symbol table!\n"; + delete C; return true; + } + break; + + default: + cerr << "Unknown class block: " << Type << endl; + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { delete C; return true; } + } + + if (!MethodSignatureList.empty()) // Expected more methods! + return true; + return false; +} + +Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { + LateResolveValues.clear(); + unsigned Sig; + // Read and check signature... + if (read(Buf, EndBuf, Sig) || + Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) + return 0; // Invalid signature! + + Module *Result; + if (ParseModule(Buf, EndBuf, Result)) return 0; + return Result; +} + + +Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) { + BytecodeParser Parser; + return Parser.ParseBytecode(Buffer, Buffer+Length); +} + +// Parse and return a class file... +// +Module *ParseBytecodeFile(const string &Filename) { + struct stat StatBuf; + Module *Result = 0; + + if (Filename != string("-")) { // Read from a file... + int FD = open(Filename.data(), O_RDONLY); + if (FD == -1) return 0; + + if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; } + + int Length = StatBuf.st_size; + if (Length == 0) { close(FD); return 0; } + uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ, + MAP_PRIVATE, FD, 0); + if (Buffer == (uchar*)-1) { close(FD); return 0; } + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buffer, Buffer+Length); + + munmap((char*)Buffer, Length); + close(FD); + } else { // Read from stdin + size_t FileSize = 0; + int BlockSize; + uchar Buffer[4096], *FileData = 0; + while ((BlockSize = read(0, Buffer, 4))) { + if (BlockSize == -1) { free(FileData); return 0; } + + FileData = (uchar*)realloc(FileData, FileSize+BlockSize); + memcpy(FileData+FileSize, Buffer, BlockSize); + FileSize += BlockSize; + } + + if (FileSize == 0) { free(FileData); return 0; } + +#define ALIGN_PTRS 1 +#if ALIGN_PTRS + uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert((Buf != (uchar*)-1) && "mmap returned error!"); + free(FileData); + memcpy(Buf, FileData, FileSize); +#else + uchar *Buf = FileData; +#endif + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buf, Buf+FileSize); + +#if ALIGN_PTRS + munmap((char*)Buf, FileSize); // Free mmap'd data area +#else + free(FileData); // Free realloc'd block of memory +#endif + } + + return Result; +} |