diff options
Diffstat (limited to 'lib/Bytecode/Analyzer')
-rw-r--r-- | lib/Bytecode/Analyzer/Analyzer.cpp | 87 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/AnalyzerWrappers.cpp | 137 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/BytecodeHandler.cpp | 220 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Dumper.cpp | 3 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Parser.cpp | 510 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Parser.h | 92 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/ReaderPrimitives.h | 101 |
7 files changed, 590 insertions, 560 deletions
diff --git a/lib/Bytecode/Analyzer/Analyzer.cpp b/lib/Bytecode/Analyzer/Analyzer.cpp index eb710711d9..133c1fbaa6 100644 --- a/lib/Bytecode/Analyzer/Analyzer.cpp +++ b/lib/Bytecode/Analyzer/Analyzer.cpp @@ -30,13 +30,13 @@ public: bool handleError(const std::string& str ) { - std::cerr << "Analysis Error: " << str; return false; } void handleStart() { bca.ModuleId.clear(); + bca.numBlocks = 0; bca.numTypes = 0; bca.numValues = 0; bca.numFunctions = 0; @@ -49,16 +49,38 @@ public: bca.numSymTab = 0; bca.maxTypeSlot = 0; bca.maxValueSlot = 0; - bca.density = 0.0; + bca.numAlignment = 0; + bca.fileDensity = 0.0; + bca.globalsDensity = 0.0; + bca.functionDensity = 0.0; + bca.vbrCount32 = 0; + bca.vbrCount64 = 0; + bca.vbrCompBytes = 0; + bca.vbrExpdBytes = 0; bca.FunctionInfo.clear(); bca.BytecodeDump.clear(); + bca.BlockSizes[BytecodeFormat::Module] = 0; + bca.BlockSizes[BytecodeFormat::Function] = 0; + bca.BlockSizes[BytecodeFormat::ConstantPool] = 0; + bca.BlockSizes[BytecodeFormat::SymbolTable] = 0; + bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo] = 0; + bca.BlockSizes[BytecodeFormat::GlobalTypePlane] = 0; + bca.BlockSizes[BytecodeFormat::BasicBlock] = 0; + bca.BlockSizes[BytecodeFormat::InstructionList] = 0; + bca.BlockSizes[BytecodeFormat::CompactionTable] = 0; } void handleFinish() { - bca.density = bca.numTypes + bca.numFunctions + bca.numConstants + - bca.numGlobalVars + bca.numInstructions; - bca.density /= bca.byteSize; + bca.fileDensity = double(bca.byteSize) / double( bca.numTypes + bca.numValues ); + double globalSize = 0.0; + globalSize += double(bca.BlockSizes[BytecodeFormat::ConstantPool]); + globalSize += double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]); + globalSize += double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]); + bca.globalsDensity = globalSize / double( bca.numTypes + bca.numConstants + + bca.numGlobalVars ); + bca.functionDensity = double(bca.BlockSizes[BytecodeFormat::Function]) / + double(bca.numFunctions); } void handleModuleBegin(const std::string& id) @@ -78,8 +100,9 @@ public: { } - void handleModuleGlobalsBegin() + void handleModuleGlobalsBegin(unsigned size) { + // bca.globalBytesize += size; } void handleGlobalVariable( @@ -89,6 +112,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } void handleInitializedGV( @@ -99,6 +123,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } virtual void handleType( const Type* Ty ) @@ -111,6 +136,7 @@ public: ) { bca.numFunctions++; + bca.numValues++; } void handleModuleGlobalsEnd() @@ -200,15 +226,19 @@ public: ) { bca.numBasicBlocks++; + bca.numValues++; } bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector<unsigned>& Operands + std::vector<unsigned>& Operands, + unsigned Size ) { bca.numInstructions++; + bca.numValues++; + bca.numOperands += Operands.size(); return Instruction::isTerminator(Opcode); } @@ -227,43 +257,67 @@ public: ) { bca.numConstants++; + bca.numValues++; } void handleConstantValue( Constant * c ) { bca.numConstants++; + bca.numValues++; } void handleConstantArray( - const ArrayType* AT, - std::vector<unsigned>& Elements ) + const ArrayType* AT, + std::vector<unsigned>& Elements ) { bca.numConstants++; + bca.numValues++; } void handleConstantStruct( - const StructType* ST, - std::vector<unsigned>& ElementSlots) + const StructType* ST, + std::vector<unsigned>& ElementSlots) { bca.numConstants++; + bca.numValues++; } void handleConstantPointer( - const PointerType* PT, unsigned Slot) + const PointerType* PT, unsigned Slot) { bca.numConstants++; + bca.numValues++; } void handleConstantString( const ConstantArray* CA ) { bca.numConstants++; + bca.numValues++; } - void handleGlobalConstantsEnd() - { + void handleGlobalConstantsEnd() { } + + void handleAlignment(unsigned numBytes) { + bca.numAlignment += numBytes; + } + + void handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) { + bca.numBlocks++; + bca.BlockSizes[llvm::BytecodeFormat::FileBlockIDs(BType)] += Size; } + virtual void handleVBR32(unsigned Size ) { + bca.vbrCount32++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint32_t); + } + virtual void handleVBR64(unsigned Size ) { + bca.vbrCount64++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint64_t); + } }; } @@ -277,10 +331,9 @@ void llvm::BytecodeAnalyzer::AnalyzeBytecode( { bca.byteSize = Length; AnalyzerHandler TheHandler(bca); - AbstractBytecodeParser TheParser(&TheHandler); + AbstractBytecodeParser TheParser(&TheHandler, true, true, true); TheParser.ParseBytecode( Buf, Length, ModuleID ); - if ( bca.detailedResults ) - TheParser.ParseAllFunctionBodies(); + TheParser.ParseAllFunctionBodies(); } // vim: sw=2 diff --git a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp index 2caf069eb4..fd23dc8086 100644 --- a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp +++ b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp @@ -18,6 +18,7 @@ #include "Support/StringExtras.h" #include "Config/unistd.h" #include <cerrno> +#include <iomanip> using namespace llvm; @@ -46,7 +47,7 @@ static std::string ErrnoMessage (int savedErrNum, std::string descr) { } BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, - BytecodeAnalysis& bca) { + BytecodeAnalysis& bca) { Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); if (Buffer == 0) throw "Error reading file '" + Filename + "'."; @@ -84,16 +85,16 @@ namespace { public: BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, - BytecodeAnalysis& bca, const std::string &ModuleID); + BytecodeAnalysis& bca, const std::string &ModuleID); ~BytecodeBufferAnalyzer(); }; } BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, - unsigned Length, - BytecodeAnalysis& bca, - const std::string &ModuleID) { + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { // If not aligned, allocate a new buffer to hold the bytecode... const unsigned char *ParseBegin = 0; if ((intptr_t)Buf & 3) { @@ -200,28 +201,118 @@ void llvm::AnalyzeBytecodeBuffer( /// This function prints the contents of rhe BytecodeAnalysis structure in /// a human legible form. /// @brief Print BytecodeAnalysis structure to an ostream +namespace { +inline static void print(std::ostream& Out, const char*title, + unsigned val, bool nl = true ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << val << "\n"; +} + +inline static void print(std::ostream&Out, const char*title, + double val ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << val << "\n" ; +} + +inline static void print(std::ostream&Out, const char*title, + double top, double bot ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << top + << " (" << std::left << std::setw(0) << std::setprecision(4) + << (top/bot)*100.0 << "%)\n"; +} +inline static void print(std::ostream&Out, const char*title, + std::string val, bool nl = true) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::left << val << (nl ? "\n" : ""); +} + +} + void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) { - Out << " Bytecode Analysis of: " << bca.ModuleId << "\n"; - Out << " File Size: " << bca.byteSize << "\n"; - Out << " Number Of Types: " << bca.numTypes << "\n"; - Out << " Number Of Constants: " << bca.numConstants << "\n"; - Out << " Number Of Global Variables: " << bca.numGlobalVars << "\n"; - Out << " Number Of Functions: " << bca.numFunctions << "\n"; - Out << " Number Of Basic Blocks: " << bca.numBasicBlocks << "\n"; - Out << " Number Of Instructions: " << bca.numInstructions << "\n"; - Out << " Number Of Operands: " << bca.numOperands << "\n"; - Out << "Number Of Compaction Tables: " << bca.numCmpctnTables << "\n"; - Out << " Number Of Symbol Tables: " << bca.numSymTab << "\n"; - Out << " Maximum Type Slot Number: " << bca.maxTypeSlot << "\n"; - Out << " Maximum Value Slot Number: " << bca.maxValueSlot << "\n"; - Out << " Bytecode Density: " << bca.density << "\n"; - - if ( bca.detailedResults ) - Out << "Detailed Results Not Implemented Yet.\n"; + print(Out, "Bytecode Analysis Of Module", bca.ModuleId); + print(Out, "File Size", bca.byteSize); + print(Out, "Bytecode Compression Index",std::string("TBD")); + print(Out, "Number Of Bytecode Blocks", bca.numBlocks); + print(Out, "Number Of Types", bca.numTypes); + print(Out, "Number Of Values", bca.numValues); + print(Out, "Number Of Constants", bca.numConstants); + print(Out, "Number Of Global Variables", bca.numGlobalVars); + print(Out, "Number Of Functions", bca.numFunctions); + print(Out, "Number Of Basic Blocks", bca.numBasicBlocks); + print(Out, "Number Of Instructions", bca.numInstructions); + print(Out, "Number Of Operands", bca.numOperands); + print(Out, "Number Of Compaction Tables", bca.numCmpctnTables); + print(Out, "Number Of Symbol Tables", bca.numSymTab); + print(Out, "Maximum Type Slot Number", bca.maxTypeSlot); + print(Out, "Maximum Value Slot Number", bca.maxValueSlot); + print(Out, "Bytes Thrown To Alignment", double(bca.numAlignment), + double(bca.byteSize)); + print(Out, "File Density (bytes/def)", bca.fileDensity); + print(Out, "Globals Density (bytes/def)", bca.globalsDensity); + print(Out, "Function Density (bytes/func)", bca.functionDensity); + print(Out, "Number of VBR 32-bit Integers", bca.vbrCount32); + print(Out, "Number of VBR 64-bit Integers", bca.vbrCount64); + print(Out, "Number of VBR Compressed Bytes", bca.vbrCompBytes); + print(Out, "Number of VBR Expanded Bytes", bca.vbrExpdBytes); + print(Out, "VBR Savings", + double(bca.vbrExpdBytes)-double(bca.vbrCompBytes), + double(bca.byteSize)); + + if ( bca.detailedResults ) { + print(Out, "Module Bytes", + double(bca.BlockSizes[BytecodeFormat::Module]), + double(bca.byteSize)); + print(Out, "Function Bytes", + double(bca.BlockSizes[BytecodeFormat::Function]), + double(bca.byteSize)); + print(Out, "Constant Pool Bytes", + double(bca.BlockSizes[BytecodeFormat::ConstantPool]), + double(bca.byteSize)); + print(Out, "Symbol Table Bytes", + double(bca.BlockSizes[BytecodeFormat::SymbolTable]), + double(bca.byteSize)); + print(Out, "Module Global Info Bytes", + double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]), + double(bca.byteSize)); + print(Out, "Global Type Plane Bytes", + double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]), + double(bca.byteSize)); + print(Out, "Basic Block Bytes", + double(bca.BlockSizes[BytecodeFormat::BasicBlock]), + double(bca.byteSize)); + print(Out, "Instruction List Bytes", + double(bca.BlockSizes[BytecodeFormat::InstructionList]), + double(bca.byteSize)); + print(Out, "Compaction Table Bytes", + double(bca.BlockSizes[BytecodeFormat::CompactionTable]), + double(bca.byteSize)); + + std::map<unsigned,BytecodeAnalysis::BytecodeFunctionInfo>::iterator I = + bca.FunctionInfo.begin(); + std::map<unsigned,BytecodeAnalysis::BytecodeFunctionInfo>::iterator E = + bca.FunctionInfo.end(); + + while ( I != E ) { + Out << std::left << std::setw(0); + Out << "Function: " << I->second.name << " Slot=" << I->first << "\n"; + print(Out,"Type:", I->second.description); + print(Out,"Byte Size", I->second.byteSize); + print(Out,"Instructions", I->second.numInstructions); + print(Out,"Basic Blocks", I->second.numBasicBlocks); + print(Out,"Operand", I->second.numOperands); + print(Out,"Function Density", I->second.density); + print(Out,"VBR Effectiveness", I->second.vbrEffectiveness); + ++I; + } + } if ( bca.dumpBytecode ) Out << bca.BytecodeDump; } - // vim: sw=2 diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.cpp b/lib/Bytecode/Analyzer/BytecodeHandler.cpp deleted file mode 100644 index 6ceaf38116..0000000000 --- a/lib/Bytecode/Analyzer/BytecodeHandler.cpp +++ /dev/null @@ -1,220 +0,0 @@ -//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by Reid Spencer and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header file defines the BytecodeHandler class that gets called by the -// AbstractBytecodeParser when parsing events occur. -// -//===----------------------------------------------------------------------===// - -#include "Parser.h" - -using namespace llvm; - -bool BytecodeHandler::handleError(const std::string& str ) -{ - return false; -} - -void BytecodeHandler::handleStart() -{ -} - -void BytecodeHandler::handleFinish() -{ -} - -void BytecodeHandler::handleModuleBegin(const std::string& id) -{ -} - -void BytecodeHandler::handleModuleEnd(const std::string& id) -{ -} - -void BytecodeHandler::handleVersionInfo( - unsigned char RevisionNum, ///< Byte code revision number - Module::Endianness Endianness, ///< Endianness indicator - Module::PointerSize PointerSize ///< PointerSize indicator -) -{ -} - -void BytecodeHandler::handleModuleGlobalsBegin() -{ -} - -void BytecodeHandler::handleGlobalVariable( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes ///< The linkage type of the GV -) -{ -} - -void BytecodeHandler::handleInitializedGV( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes,///< The linkage type of the GV - unsigned initSlot ///< Slot number of GV's initializer -) -{ -} - -void BytecodeHandler::handleType( const Type* Ty ) -{ -} - -void BytecodeHandler::handleFunctionDeclaration( - const Type* FuncType ///< The type of the function -) -{ -} - -void BytecodeHandler::handleModuleGlobalsEnd() -{ -} - -void BytecodeHandler::handleCompactionTableBegin() -{ -} - -void BytecodeHandler::handleCompactionTablePlane( - unsigned Ty, - unsigned NumEntries -) -{ -} - -void BytecodeHandler::handleCompactionTableType( - unsigned i, - unsigned TypSlot, - const Type* -) -{ -} - -void BytecodeHandler::handleCompactionTableValue( - unsigned i, - unsigned ValSlot, - const Type* -) -{ -} - -void BytecodeHandler::handleCompactionTableEnd() -{ -} - -void BytecodeHandler::handleSymbolTableBegin() -{ -} - -void BytecodeHandler::handleSymbolTablePlane( - unsigned Ty, - unsigned NumEntries, - const Type* Typ -) -{ -} - -void BytecodeHandler::handleSymbolTableType( - unsigned i, - unsigned slot, - const std::string& name -) -{ -} - -void BytecodeHandler::handleSymbolTableValue( - unsigned i, - unsigned slot, - const std::string& name -) -{ -} - -void BytecodeHandler::handleSymbolTableEnd() -{ -} - -void BytecodeHandler::handleFunctionBegin( - const Type* FType, - GlobalValue::LinkageTypes linkage -) -{ -} - -void BytecodeHandler::handleFunctionEnd( - const Type* FType -) -{ -} - -void BytecodeHandler::handleBasicBlockBegin( - unsigned blocknum -) -{ -} - -bool BytecodeHandler::handleInstruction( - unsigned Opcode, - const Type* iType, - std::vector<unsigned>& Operands -) -{ - return false; -} - -void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) -{ -} - -void BytecodeHandler::handleGlobalConstantsBegin() -{ -} - -void BytecodeHandler::handleConstantExpression( - unsigned Opcode, - const Type* Typ, - std::vector<std::pair<const Type*,unsigned> > ArgVec - ) -{ -} - -void BytecodeHandler::handleConstantValue( Constant * c ) -{ -} - -void BytecodeHandler::handleConstantArray( - const ArrayType* AT, - std::vector<unsigned>& Elements ) -{ -} - -void BytecodeHandler::handleConstantStruct( - const StructType* ST, - std::vector<unsigned>& ElementSlots) -{ -} - -void BytecodeHandler::handleConstantPointer( - const PointerType* PT, unsigned Slot) -{ -} - -void BytecodeHandler::handleConstantString( const ConstantArray* CA ) -{ -} - - -void BytecodeHandler::handleGlobalConstantsEnd() -{ -} - -// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Dumper.cpp b/lib/Bytecode/Analyzer/Dumper.cpp index 12752ff883..d61afe4255 100644 --- a/lib/Bytecode/Analyzer/Dumper.cpp +++ b/lib/Bytecode/Analyzer/Dumper.cpp @@ -212,7 +212,8 @@ public: virtual bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector<unsigned>& Operands + std::vector<unsigned>& Operands, + unsigned Size ) { std::cout << " INST: OpCode=" diff --git a/lib/Bytecode/Analyzer/Parser.cpp b/lib/Bytecode/Analyzer/Parser.cpp index 743db6c315..80800e75a6 100644 --- a/lib/Bytecode/Analyzer/Parser.cpp +++ b/lib/Bytecode/Analyzer/Parser.cpp @@ -1,4 +1,4 @@ -//===- Reader.cpp - Code to read bytecode files ---------------------------===// +//===- Parser.cpp - Code to parse bytecode files --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This library implements the functionality defined in llvm/Bytecode/Reader.h +// This library implements the functionality defined in llvm/Bytecode/Parser.h // // Note that this library should be as fast as possible, reentrant, and // threadsafe!! @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "AnalyzerInternals.h" -#include "ReaderPrimitives.h" #include "llvm/Module.h" #include "llvm/Bytecode/Format.h" #include "Support/StringExtras.h" @@ -37,40 +36,128 @@ using namespace llvm; #define BCR_TRACE(n, X) #endif -#define PARSE_ERROR(inserters) \ - { \ +#define PARSE_ERROR(inserters) { \ std::ostringstream errormsg; \ errormsg << inserters; \ if ( ! handler->handleError( errormsg.str() ) ) \ throw std::string(errormsg.str()); \ } +inline bool AbstractBytecodeParser::moreInBlock() { + return At < BlockEnd; +} -inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) -{ - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); +inline void AbstractBytecodeParser::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); } -const Type *AbstractBytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +inline void AbstractBytecodeParser::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( reportAlignment && At > Save ) handler->handleAlignment( At - Save ); + if (At > BlockEnd) + throw std::string("Ran out of data while aligning!"); +} + +inline unsigned AbstractBytecodeParser::read_uint() { + if (At+4 > BlockEnd) + throw std::string("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned AbstractBytecodeParser::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR32(At-Save); + return Result; +} + +inline uint64_t AbstractBytecodeParser::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR64(At-Save); + return Result; +} - if (ID < Type::FirstDerivedTyID) - if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) - return T; // Asked for a primitive type... +inline int64_t AbstractBytecodeParser::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} - // Otherwise, derived types need offset... - ID -= Type::FirstDerivedTyID; +inline std::string AbstractBytecodeParser::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + throw std::string("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void AbstractBytecodeParser::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + throw std::string("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} - if (!CompactionTypeTable.empty()) { - if (ID >= CompactionTypeTable.size()) - PARSE_ERROR("Type ID out of range for compaction table!"); - return CompactionTypeTable[ID]; +inline void AbstractBytecodeParser::readBlock(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + throw std::string("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + if ( reportBlocks ) { + handler->handleBlock( Type, BlockStart, Size ); } +} + +const Type *AbstractBytecodeParser::getType(unsigned ID) { +//cerr << "Looking up Type ID: " << ID << "\n"; + +if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + +// Otherwise, derived types need offset... +ID -= Type::FirstDerivedTyID; - // Is it a module-level type? +if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; +} + +// Is it a module-level type? if (ID < ModuleTypes.size()) return ModuleTypes[ID].get(); @@ -83,12 +170,12 @@ const Type *AbstractBytecodeParser::getType(unsigned ID) { return Type::VoidTy; } -bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, - std::vector<unsigned> &Operands) { +bool AbstractBytecodeParser::ParseInstruction(std::vector<unsigned> &Operands) { + BufPtr SaveAt = At; Operands.clear(); unsigned iType = 0; unsigned Opcode = 0; - unsigned Op = read(Buf, EndBuf); + unsigned Op = read_uint(); // bits Instruction format: Common to all formats // -------------------------- @@ -134,61 +221,56 @@ bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, Operands[2] = (Op >> 26) & 63; break; case 0: - Buf -= 4; // Hrm, try this again... - Opcode = read_vbr_uint(Buf, EndBuf); + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); Opcode >>= 2; - iType = read_vbr_uint(Buf, EndBuf); + iType = read_vbr_uint(); - unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + unsigned NumOperands = read_vbr_uint(); Operands.resize(NumOperands); if (NumOperands == 0) PARSE_ERROR("Zero-argument instruction found; this is invalid."); for (unsigned i = 0; i != NumOperands; ++i) - Operands[i] = read_vbr_uint(Buf, EndBuf); - align32(Buf, EndBuf); + Operands[i] = read_vbr_uint(); + align32(); break; } - return handler->handleInstruction(Opcode, getType(iType), Operands); + return handler->handleInstruction(Opcode, getType(iType), Operands, At-SaveAt); } /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, - BufPtr EndBuf, - unsigned BlockNo) { +void AbstractBytecodeParser::ParseBasicBlock( unsigned BlockNo) { handler->handleBasicBlockBegin( BlockNo ); std::vector<unsigned> Args; bool is_terminating = false; - while (Buf < EndBuf) - is_terminating = ParseInstruction(Buf, EndBuf, Args); + while ( moreInBlock() ) + is_terminating = ParseInstruction(Args); if ( ! is_terminating ) - PARSE_ERROR( - "Failed to recognize instruction as terminating at end of block"); + PARSE_ERROR("Non-terminated basic block found!"); handler->handleBasicBlockEnd( BlockNo ); } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, - BufPtr EndBuf) { +unsigned AbstractBytecodeParser::ParseInstructionList() { unsigned BlockNo = 0; std::vector<unsigned> Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { handler->handleBasicBlockBegin( BlockNo ); // Read instructions into this basic block until we get to a terminator bool is_terminating = false; - while (Buf < EndBuf && !is_terminating ) - is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + while (moreInBlock() && !is_terminating ) + is_terminating = ParseInstruction(Args ) ; if (!is_terminating) PARSE_ERROR( "Non-terminated basic block found!"); @@ -199,36 +281,34 @@ unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, return BlockNo; } -void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseSymbolTable() { handler->handleSymbolTableBegin(); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); if (Typ == Type::TypeTyID) handler->handleSymbolTableType( i, slot, Name ); else - handler->handleSymbolTableValue( i, slot, Name ); + handler->handleSymbolTableValue( i, slot, Name ); } } - - if (Buf > EndBuf) - PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + checkPastBlockEnd("Symbol Table"); handler->handleSymbolTableEnd(); } -void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseFunctionLazily() { if (FunctionSignatureList.empty()) throw std::string("FunctionSignatureList empty!"); @@ -236,9 +316,10 @@ void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { FunctionSignatureList.pop_back(); // Save the information for future reading of the function - LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + LazyFunctionLoadMap[FType] = LazyFunctionInfo(BlockStart, BlockEnd); + // Pretend we've `parsed' this function - Buf = EndBuf; + At = BlockEnd; } void AbstractBytecodeParser::ParseNextFunction(Type* FType) { @@ -251,21 +332,20 @@ void AbstractBytecodeParser::ParseNextFunction(Type* FType) { return; } - BufPtr Buf = Fi->second.Buf; - BufPtr EndBuf = Fi->second.EndBuf; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.Buf; assert(Fi->first == FType); LazyFunctionLoadMap.erase(Fi); - this->ParseFunctionBody( FType, Buf, EndBuf ); + this->ParseFunctionBody( FType ); } -void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, - BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType ) { GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; - unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + unsigned LinkageType = read_vbr_uint(); switch (LinkageType) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; @@ -284,43 +364,45 @@ void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, unsigned BlockNum = 0; bool InsertedArguments = false; - while (Buf < EndBuf) { + BufPtr MyEnd = BlockEnd; + while ( At < MyEnd ) { unsigned Type, Size; - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + ParseConstantPool(FunctionTypes ); break; case BytecodeFormat::CompactionTable: - ParseCompactionTable(Buf, Buf+Size); + ParseCompactionTable(); break; case BytecodeFormat::BasicBlock: - ParseBasicBlock(Buf, Buf+Size, BlockNum++); + ParseBasicBlock(BlockNum++); break; case BytecodeFormat::InstructionList: if (BlockNum) - PARSE_ERROR("InstructionList must come before basic blocks!"); - BlockNum = ParseInstructionList(Buf, Buf+Size); + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - PARSE_ERROR("Wrapped around reading bytecode"); + At += Size; + if (OldAt > At) |