diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2004-06-10 08:09:13 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2004-06-10 08:09:13 +0000 |
commit | 00c28a7481324a4804badbdbabb555c0d94e66f3 (patch) | |
tree | 92aa0212394ef44bc0a170cd319f3887db44ff30 /lib/Bytecode/Analyzer/Parser.cpp | |
parent | 43f38677f565a485e80e9254ad08e955e1bf0b73 (diff) |
Implemented the bulk of the functionality. Cleaned up the code.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14113 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Bytecode/Analyzer/Parser.cpp')
-rw-r--r-- | lib/Bytecode/Analyzer/Parser.cpp | 510 |
1 files changed, 335 insertions, 175 deletions
diff --git a/lib/Bytecode/Analyzer/Parser.cpp b/lib/Bytecode/Analyzer/Parser.cpp index 743db6c315..80800e75a6 100644 --- a/lib/Bytecode/Analyzer/Parser.cpp +++ b/lib/Bytecode/Analyzer/Parser.cpp @@ -1,4 +1,4 @@ -//===- Reader.cpp - Code to read bytecode files ---------------------------===// +//===- Parser.cpp - Code to parse bytecode files --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This library implements the functionality defined in llvm/Bytecode/Reader.h +// This library implements the functionality defined in llvm/Bytecode/Parser.h // // Note that this library should be as fast as possible, reentrant, and // threadsafe!! @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "AnalyzerInternals.h" -#include "ReaderPrimitives.h" #include "llvm/Module.h" #include "llvm/Bytecode/Format.h" #include "Support/StringExtras.h" @@ -37,40 +36,128 @@ using namespace llvm; #define BCR_TRACE(n, X) #endif -#define PARSE_ERROR(inserters) \ - { \ +#define PARSE_ERROR(inserters) { \ std::ostringstream errormsg; \ errormsg << inserters; \ if ( ! handler->handleError( errormsg.str() ) ) \ throw std::string(errormsg.str()); \ } +inline bool AbstractBytecodeParser::moreInBlock() { + return At < BlockEnd; +} -inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) -{ - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); +inline void AbstractBytecodeParser::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); } -const Type *AbstractBytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +inline void AbstractBytecodeParser::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( reportAlignment && At > Save ) handler->handleAlignment( At - Save ); + if (At > BlockEnd) + throw std::string("Ran out of data while aligning!"); +} + +inline unsigned AbstractBytecodeParser::read_uint() { + if (At+4 > BlockEnd) + throw std::string("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned AbstractBytecodeParser::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR32(At-Save); + return Result; +} + +inline uint64_t AbstractBytecodeParser::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR64(At-Save); + return Result; +} - if (ID < Type::FirstDerivedTyID) - if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) - return T; // Asked for a primitive type... +inline int64_t AbstractBytecodeParser::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} - // Otherwise, derived types need offset... - ID -= Type::FirstDerivedTyID; +inline std::string AbstractBytecodeParser::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + throw std::string("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void AbstractBytecodeParser::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + throw std::string("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} - if (!CompactionTypeTable.empty()) { - if (ID >= CompactionTypeTable.size()) - PARSE_ERROR("Type ID out of range for compaction table!"); - return CompactionTypeTable[ID]; +inline void AbstractBytecodeParser::readBlock(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + throw std::string("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + if ( reportBlocks ) { + handler->handleBlock( Type, BlockStart, Size ); } +} + +const Type *AbstractBytecodeParser::getType(unsigned ID) { +//cerr << "Looking up Type ID: " << ID << "\n"; + +if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + +// Otherwise, derived types need offset... +ID -= Type::FirstDerivedTyID; - // Is it a module-level type? +if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; +} + +// Is it a module-level type? if (ID < ModuleTypes.size()) return ModuleTypes[ID].get(); @@ -83,12 +170,12 @@ const Type *AbstractBytecodeParser::getType(unsigned ID) { return Type::VoidTy; } -bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, - std::vector<unsigned> &Operands) { +bool AbstractBytecodeParser::ParseInstruction(std::vector<unsigned> &Operands) { + BufPtr SaveAt = At; Operands.clear(); unsigned iType = 0; unsigned Opcode = 0; - unsigned Op = read(Buf, EndBuf); + unsigned Op = read_uint(); // bits Instruction format: Common to all formats // -------------------------- @@ -134,61 +221,56 @@ bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, Operands[2] = (Op >> 26) & 63; break; case 0: - Buf -= 4; // Hrm, try this again... - Opcode = read_vbr_uint(Buf, EndBuf); + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); Opcode >>= 2; - iType = read_vbr_uint(Buf, EndBuf); + iType = read_vbr_uint(); - unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + unsigned NumOperands = read_vbr_uint(); Operands.resize(NumOperands); if (NumOperands == 0) PARSE_ERROR("Zero-argument instruction found; this is invalid."); for (unsigned i = 0; i != NumOperands; ++i) - Operands[i] = read_vbr_uint(Buf, EndBuf); - align32(Buf, EndBuf); + Operands[i] = read_vbr_uint(); + align32(); break; } - return handler->handleInstruction(Opcode, getType(iType), Operands); + return handler->handleInstruction(Opcode, getType(iType), Operands, At-SaveAt); } /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, - BufPtr EndBuf, - unsigned BlockNo) { +void AbstractBytecodeParser::ParseBasicBlock( unsigned BlockNo) { handler->handleBasicBlockBegin( BlockNo ); std::vector<unsigned> Args; bool is_terminating = false; - while (Buf < EndBuf) - is_terminating = ParseInstruction(Buf, EndBuf, Args); + while ( moreInBlock() ) + is_terminating = ParseInstruction(Args); if ( ! is_terminating ) - PARSE_ERROR( - "Failed to recognize instruction as terminating at end of block"); + PARSE_ERROR("Non-terminated basic block found!"); handler->handleBasicBlockEnd( BlockNo ); } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, - BufPtr EndBuf) { +unsigned AbstractBytecodeParser::ParseInstructionList() { unsigned BlockNo = 0; std::vector<unsigned> Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { handler->handleBasicBlockBegin( BlockNo ); // Read instructions into this basic block until we get to a terminator bool is_terminating = false; - while (Buf < EndBuf && !is_terminating ) - is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + while (moreInBlock() && !is_terminating ) + is_terminating = ParseInstruction(Args ) ; if (!is_terminating) PARSE_ERROR( "Non-terminated basic block found!"); @@ -199,36 +281,34 @@ unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, return BlockNo; } -void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseSymbolTable() { handler->handleSymbolTableBegin(); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); if (Typ == Type::TypeTyID) handler->handleSymbolTableType( i, slot, Name ); else - handler->handleSymbolTableValue( i, slot, Name ); + handler->handleSymbolTableValue( i, slot, Name ); } } - - if (Buf > EndBuf) - PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + checkPastBlockEnd("Symbol Table"); handler->handleSymbolTableEnd(); } -void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseFunctionLazily() { if (FunctionSignatureList.empty()) throw std::string("FunctionSignatureList empty!"); @@ -236,9 +316,10 @@ void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { FunctionSignatureList.pop_back(); // Save the information for future reading of the function - LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + LazyFunctionLoadMap[FType] = LazyFunctionInfo(BlockStart, BlockEnd); + // Pretend we've `parsed' this function - Buf = EndBuf; + At = BlockEnd; } void AbstractBytecodeParser::ParseNextFunction(Type* FType) { @@ -251,21 +332,20 @@ void AbstractBytecodeParser::ParseNextFunction(Type* FType) { return; } - BufPtr Buf = Fi->second.Buf; - BufPtr EndBuf = Fi->second.EndBuf; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.Buf; assert(Fi->first == FType); LazyFunctionLoadMap.erase(Fi); - this->ParseFunctionBody( FType, Buf, EndBuf ); + this->ParseFunctionBody( FType ); } -void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, - BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType ) { GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; - unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + unsigned LinkageType = read_vbr_uint(); switch (LinkageType) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; @@ -284,43 +364,45 @@ void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, unsigned BlockNum = 0; bool InsertedArguments = false; - while (Buf < EndBuf) { + BufPtr MyEnd = BlockEnd; + while ( At < MyEnd ) { unsigned Type, Size; - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + ParseConstantPool(FunctionTypes ); break; case BytecodeFormat::CompactionTable: - ParseCompactionTable(Buf, Buf+Size); + ParseCompactionTable(); break; case BytecodeFormat::BasicBlock: - ParseBasicBlock(Buf, Buf+Size, BlockNum++); + ParseBasicBlock(BlockNum++); break; case BytecodeFormat::InstructionList: if (BlockNum) - PARSE_ERROR("InstructionList must come before basic blocks!"); - BlockNum = ParseInstructionList(Buf, Buf+Size); + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - PARSE_ERROR("Wrapped around reading bytecode"); + At += Size; + if (OldAt > At) + PARSE_ERROR("Wrapped around reading bytecode"); break; } + BlockEnd = MyEnd; // Malformed bc file if read past end of block. - align32(Buf, EndBuf); + align32(); } handler->handleFunctionEnd(FType); @@ -336,21 +418,24 @@ void AbstractBytecodeParser::ParseAllFunctionBodies() { while ( Fi != Fe ) { const Type* FType = Fi->first; - this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + this->ParseFunctionBody(FType); + ++Fi; } } -void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseCompactionTable() { handler->handleCompactionTableBegin(); - while (Buf != End) { - unsigned NumEntries = read_vbr_uint(Buf, End); + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); unsigned Ty; if ((NumEntries & 3) == 3) { NumEntries >>= 2; - Ty = read_vbr_uint(Buf, End); + Ty = read_vbr_uint(); } else { Ty = NumEntries >> 2; NumEntries &= 3; @@ -360,25 +445,24 @@ void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { if (Ty == Type::TypeTyID) { for (unsigned i = 0; i != NumEntries; ++i) { - unsigned TypeSlot = read_vbr_uint(Buf,End); + unsigned TypeSlot = read_vbr_uint(); const Type *Typ = getGlobalTableType(TypeSlot); - handler->handleCompactionTableType( i, TypeSlot, Typ ); + handler->handleCompactionTableType( i, TypeSlot, Typ ); } } else { const Type *Typ = getType(Ty); // Push the implicit zero for (unsigned i = 0; i != NumEntries; ++i) { - unsigned ValSlot = read_vbr_uint(Buf, End); - handler->handleCompactionTableValue( i, ValSlot, Typ ); + unsigned ValSlot = read_vbr_uint(); + handler->handleCompactionTableValue( i, ValSlot, Typ ); } } } handler->handleCompactionTableEnd(); } -const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned PrimType = read_vbr_uint(Buf, EndBuf); +const Type *AbstractBytecodeParser::ParseTypeConstant() { + unsigned PrimType = read_vbr_uint(); const Type *Val = 0; if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) @@ -386,13 +470,13 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, switch (PrimType) { case Type::FunctionTyID: { - const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + const Type *RetType = getType(read_vbr_uint()); - unsigned NumParams = read_vbr_uint(Buf, EndBuf); + unsigned NumParams = read_vbr_uint(); std::vector<const Type*> Params; while (NumParams--) - Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + Params.push_back(getType(read_vbr_uint())); bool isVarArg = Params.size() && Params.back() == Type::VoidTy; if (isVarArg) Params.pop_back(); @@ -402,10 +486,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::ArrayTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); const Type *ElementType = getType(ElTyp); - unsigned NumElements = read_vbr_uint(Buf, EndBuf); + unsigned NumElements = read_vbr_uint(); BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" << NumElements << "\n"); @@ -415,10 +499,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, } case Type::StructTyID: { std::vector<const Type*> Elements; - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); while (Typ) { // List is terminated by void/0 typeid Elements.push_back(getType(Typ)); - Typ = read_vbr_uint(Buf, EndBuf); + Typ = read_vbr_uint(); } Type* result = StructType::get(Elements); @@ -426,7 +510,7 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::PointerTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); Type* result = PointerType::get(getType(ElTyp)); handler->handleType( result ); @@ -455,10 +539,9 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, // something and when we reread the type later, we can replace the opaque type // with a new resolved concrete type. // -void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &Tab, - unsigned NumEntries) { +void AbstractBytecodeParser::ParseTypeConstants( + TypeListTy &Tab, unsigned NumEntries +) { assert(Tab.size() == 0 && "should not have read type constants in before!"); // Insert a bunch of opaque types to be resolved later... @@ -470,7 +553,7 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, // opaque types just inserted. // for (unsigned i = 0; i != NumEntries; ++i) { - const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + const Type *NewTy = ParseTypeConstant(), *OldTy = Tab[i].get(); if (NewTy == 0) throw std::string("Couldn't parse type!"); BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << "' Replacing: " << OldTy << "\n"); @@ -497,18 +580,16 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned TypeID) { +void AbstractBytecodeParser::ParseConstantValue(unsigned TypeID) { // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. // // 0 if not expr; numArgs if is expr - unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + unsigned isExprNumArgs = read_vbr_uint(); if (isExprNumArgs) { - unsigned Opcode = read_vbr_uint(Buf, EndBuf); + unsigned Opcode = read_vbr_uint(); const Type* Typ = getType(TypeID); // FIXME: Encoding of constant exprs could be much more compact! @@ -517,8 +598,8 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, // Read the slot number and types of each of the arguments for (unsigned i = 0; i != isExprNumArgs; ++i) { - unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); - unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgValSlot = read_vbr_uint(); + unsigned ArgTypeSlot = read_vbr_uint(); BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) << "' slot: " << ArgValSlot << "\n"); @@ -534,7 +615,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, const Type *Ty = getType(TypeID); switch (Ty->getPrimitiveID()) { case Type::BoolTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (Val != 0 && Val != 1) PARSE_ERROR("Invalid boolean value read."); @@ -545,7 +626,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::UByteTyID: // Unsigned integer types... case Type::UShortTyID: case Type::UIntTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (!ConstantUInt::isValueValidForType(Ty, Val)) throw std::string("Invalid unsigned byte/short/int read."); handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); @@ -553,7 +634,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } case Type::ULongTyID: { - handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64()) ); break; } @@ -561,7 +642,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::ShortTyID: case Type::IntTyID: { case Type::LongTyID: - int64_t Val = read_vbr_int64(Buf, EndBuf); + int64_t Val = read_vbr_int64(); if (!ConstantSInt::isValueValidForType(Ty, Val)) throw std::string("Invalid signed byte/short/int/long read."); handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); @@ -570,14 +651,14 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::FloatTyID: { float F; - input_data(Buf, EndBuf, &F, &F+1); + read_data(&F, &F+1); handler->handleConstantValue( ConstantFP::get(Ty, F) ); break; } case Type::DoubleTyID: { double Val; - input_data(Buf, EndBuf, &Val, &Val+1); + read_data(&Val, &Val+1); handler->handleConstantValue( ConstantFP::get(Ty, Val) ); break; } @@ -592,7 +673,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector<unsigned> Elements; Elements.reserve(NumElements); while (NumElements--) // Read all of the elements of the constant. - Elements.push_back(read_vbr_uint(Buf, EndBuf)); + Elements.push_back(read_vbr_uint()); handler->handleConstantArray( AT, Elements ); break; @@ -603,15 +684,16 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector<unsigned> Elements; Elements.reserve(ST->getNumElements()); for (unsigned i = 0; i != ST->getNumElements(); ++i) - Elements.push_back(read_vbr_uint(Buf, EndBuf)); - + Elements.push_back(read_vbr_uint()); handler->handleConstantStruct( ST, Elements ); + break; } case Type::PointerTyID: { // ConstantPointerRef value... const PointerType *PT = cast<PointerType>(Ty); - unsigned Slot = read_vbr_uint(Buf, EndBuf); + unsigned Slot = read_vbr_uint(); handler->handleConstantPointer( PT, Slot ); + break; } default: @@ -620,16 +702,13 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } } -void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, - const unsigned char *EndBuf) { - ParseConstantPool(Buf, EndBuf, ModuleTypes); +void AbstractBytecodeParser::ParseGlobalTypes() { + ParseConstantPool(ModuleTypes); } -void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned NumEntries ){ +void AbstractBytecodeParser::ParseStringConstants(unsigned NumEntries ){ for (; NumEntries; --NumEntries) { - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); if (!isa<ArrayType>(Ty)) throw std::string("String constant data invalid!"); @@ -641,7 +720,7 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, // Read character data. The type tells us how long the string is. char Data[ATy->getNumElements()]; - input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + read_data(Data, Data+ATy->getNumElements()); std::vector<Constant*> Elements(ATy->getNumElements()); if (ATy->getElementType() == Type::SByteTy) @@ -658,35 +737,33 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &TypeTab) { - while (Buf < EndBuf) { - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseConstantPool( TypeListTy &TypeTab) { + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); if (Typ == Type::TypeTyID) { - ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + ParseTypeConstants(TypeTab, NumEntries); } else if (Typ == Type::VoidTyID) { - ParseStringConstants(Buf, EndBuf, NumEntries); + ParseStringConstants(NumEntries); } else { BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " << NumEntries << "\n"); for (unsigned i = 0; i < NumEntries; ++i) { - ParseConstantValue(Buf, EndBuf, Typ); + ParseConstantValue(Typ); } } } - if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); + checkPastBlockEnd("Constant Pool"); } -void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseModuleGlobalInfo() { handler->handleModuleGlobalsBegin(); // Read global variables... - unsigned VarType = read_vbr_uint(Buf, End); + unsigned VarType = read_vbr_uint(); while (VarType != Type::VoidTyID) { // List is terminated by Void // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = // Linkage, bit4+ = slot# @@ -721,17 +798,17 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { // Create the global variable... if (hasInitializer) { - unsigned initSlot = read_vbr_uint(Buf,End); + unsigned initSlot = read_vbr_uint(); handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); } else handler->handleGlobalVariable( ElTy, isConstant, Linkage ); // Get next item - VarType = read_vbr_uint(Buf, End); + VarType = read_vbr_uint(); } // Read the function objects for all of the functions that are coming - unsigned FnSignature = read_vbr_uint(Buf, End); + unsigned FnSignature = read_vbr_uint(); while (FnSignature != Type::VoidTyID) { // List is terminated by Void const Type *Ty = getType(FnSignature); if (!isa<PointerType>(Ty) || @@ -750,22 +827,26 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { handler->handleFunctionDeclaration(Ty); // Get Next function signature - FnSignature = read_vbr_uint(Buf, End); + FnSignature = read_vbr_uint(); } if (hasInconsistentModuleGlobalInfo) - align32(Buf, End); + align32(); + + // Now that the function signature list is set up, reverse it so that we can + // remove elements efficiently from the back of the vector. + std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end()); // This is for future proofing... in the future extra fields may be added that // we don't understand, so we transparently ignore them. // - Buf = End; + At = BlockEnd; handler->handleModuleGlobalsEnd(); } -void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { - unsigned Version = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseVersionInfo() { + unsigned Version = read_vbr_uint(); // Unpack version number: low four bits are for flags, top bits = version Module::Endianness Endianness; @@ -814,85 +895,164 @@ void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); } -void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseModule() { unsigned Type, Size; - readBlock(Buf, EndBuf, Type, Size); - if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) - // Hrm, not a class? - PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << - ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + FunctionSignatureList.clear(); // Just in case... // Read into instance variables... - ParseVersionInfo(Buf, EndBuf); - align32(Buf, EndBuf); + ParseVersionInfo(); + align32(); /// FIXME: Is this redundant? VI is first and 4 bytes! bool SeenModuleGlobalInfo = false; bool SeenGlobalTypePlane = false; - while (Buf < EndBuf) { - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::GlobalTypePlane: if ( SeenGlobalTypePlane ) - PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); - ParseGlobalTypes(Buf, Buf+Size); + ParseGlobalTypes(); SeenGlobalTypePlane = true; break; case BytecodeFormat::ModuleGlobalInfo: if ( SeenModuleGlobalInfo ) - PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); - ParseModuleGlobalInfo(Buf, Buf+Size); + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(); SeenModuleGlobalInfo = true; break; case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, ModuleTypes); + ParseConstantPool(ModuleTypes); break; case BytecodeFormat::Function: - ParseFunctionLazily(Buf, Buf+Size); + ParseFunctionLazily(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - { - PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + At += Size; + if (OldAt > At) { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); } break; } - align32(Buf, EndBuf); + BlockEnd = MyEnd; + align32(); } + + /// Make sure we pulled them all out. If we didn't then there's a declaration + /// but a missing body. That's not allowed. + if (!FunctionSignatureList.empty()) + throw std::string( + "Function declared, but bytecode stream ended before definition"); } void AbstractBytecodeParser::ParseBytecode( - BufPtr Buf, unsigned Length, + BufPtr b, unsigned Length, const std::string &ModuleID) { + At = MemStart = BlockStart = b; + MemEnd = BlockEnd = b + Length; handler->handleStart(); - unsigned char *EndBuf = (unsigned char*)(Buf + Length); // Read and check signature... - unsigned Sig = read(Buf, EndBuf); + unsigned Sig = read_uint(); if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { PARSE_ERROR("Invalid bytecode signature: " << Sig); } handler->handleModuleBegin(ModuleID); - this->ParseModule(Buf, EndBuf); + unsigned Type, Size; + readBlock(Type, Size); + if ( Type != BytecodeFormat::Module ) { + PARSE_ERROR("Expected Module Block! At: " << unsigned(intptr_t(At)) + << ", Type:" << Type << ", Size:" << Size); + } + if ( At + Size != MemEnd ) { + PARSE_ERROR("Invalid Top Level Block Length! At: " + << unsigned(intptr_t(At)) << ", Type:" << Type << ", Size:" << Size); + } + this->ParseModule(); handler->handleModuleEnd(ModuleID); handler->handleFinish(); } +//===----------------------------------------------------------------------===// +//=== Default Implementations of Handler Methods +//===----------------------------------------------------------------------===// + +bool BytecodeHandler::handleError(const std::string& str ) { return false; } +void BytecodeHandler::handleStart() { } +void BytecodeHandler::handleFinish() { } +void BytecodeHandler::handleModuleBegin(const std::string& id) { } +void BytecodeHandler::handleModuleEnd(const std::string& id) { } +void BytecodeHandler::handleVersionInfo( unsigned char RevisionNum, + Module::Endianness Endianness, Module::PointerSize PointerSize) { } +void BytecodeHandler::handleModuleGlobalsBegin() { } +void BytecodeHandler::handleGlobalVariable( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes ) { } +void BytecodeHandler::handleInitializedGV( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes, + unsigned initSlot) {} +void BytecodeHandler::handleType( const Type* Ty ) {} +void BytecodeHandler::handleFunctionDeclaration( + const Type* FuncType) {} +void BytecodeHandler::handleModuleGlobalsEnd() { } +void BytecodeHandler::handleCompactionTableBegin() { } +void BytecodeHandler::handleCompactionTablePlane( unsigned Ty, + unsigned NumEntries) {} +void BytecodeHandler::handleCompactionTableType( unsigned i, unsigned TypSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableValue( unsigned i, unsigned ValSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableEnd() { } +void BytecodeHandler::handleSymbolTableBegin() { } +void BytecodeHandler::handleSymbolTablePlane( unsigned Ty, unsigned NumEntries, + const Type* Typ) { } +void BytecodeHandler::handleSymbolTableType( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableValue( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableEnd() { } < |