diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2004-06-07 17:53:43 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2004-06-07 17:53:43 +0000 |
commit | dac69c83c22a00d3f8de3bb4d62b9dbeb0a20caf (patch) | |
tree | b5d0372bb98f6c5080d09fa6da48cb17ded7a08e /lib/Bytecode/Reader | |
parent | d66215607c214a00c79da6625b7de5e0d25a360f (diff) |
Commit For New Tool: llvm-abcd (Analysis of ByteCode Dumper). This tool
will (eventually) provide statistical analysis of bytecode files as well
as the ability to dump them in a low level format (slot numbers not
resolved). The purpose of this is to aid in the Type!=Value change of
bug 122. With this initial release, llvm-abcd merely dumps out the
bytecode. However, the infrastructure for separating bytecode parsing from
handling the parsing events is in place. The style chosen is similar to
SAX XML parsing where a handler object is called to handlign the parsing
events. This probably isn't useful to anyone but me right now as there is
no analysis yet, and the dumper doesn't work on every bytecode file. It
will probably be useful by the end of this week. Note that there is some
duplication of code from the bytecode reader. This was done to eliminate
errors from being introduced in the reader and to minimize the impact to
other LLVM developers. At some point, the Analyzer and the Reader will be
integrated to use the same infrastructure. Also, sorry for the minor change
to Instruction.h but I just couldn't bring myself to write code that
depends on Instruction internals.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14048 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Bytecode/Reader')
-rw-r--r-- | lib/Bytecode/Reader/Analyzer.cpp | 242 | ||||
-rw-r--r-- | lib/Bytecode/Reader/AnalyzerInternals.h | 65 | ||||
-rw-r--r-- | lib/Bytecode/Reader/AnalyzerWrappers.cpp | 208 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Dumper.cpp | 311 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Parser.cpp | 877 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Parser.h | 178 |
6 files changed, 1881 insertions, 0 deletions
diff --git a/lib/Bytecode/Reader/Analyzer.cpp b/lib/Bytecode/Reader/Analyzer.cpp new file mode 100644 index 0000000000..99c3e41f9f --- /dev/null +++ b/lib/Bytecode/Reader/Analyzer.cpp @@ -0,0 +1,242 @@ +//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeHandler class that gets called by the +// AbstractBytecodeParser when parsing events occur. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" + +using namespace llvm; + + +namespace { + +class AnalyzerHandler : public BytecodeHandler { +public: + bool handleError(const std::string& str ) + { + return false; + } + + void handleStart() + { + } + + void handleFinish() + { + } + + void handleModuleBegin(const std::string& id) + { + } + + void handleModuleEnd(const std::string& id) + { + } + + void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + } + + void handleModuleGlobalsBegin() + { + } + + void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ) + { + } + + void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + } + + virtual void handleType( const Type* Ty ) + { + } + + void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ) + { + } + + void handleModuleGlobalsEnd() + { + } + + void handleCompactionTableBegin() + { + } + + void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ) + { + } + + void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ) + { + } + + void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ) + { + } + + void handleCompactionTableEnd() + { + } + + void handleSymbolTableBegin() + { + } + + void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + } + + void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableEnd() + { + } + + void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + } + + void handleFunctionEnd( + const Type* FType + ) + { + } + + void handleBasicBlockBegin( + unsigned blocknum + ) + { + } + + bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + return false; + } + + void handleBasicBlockEnd(unsigned blocknum) + { + } + + void handleGlobalConstantsBegin() + { + } + + void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + } + + void handleConstantValue( Constant * c ) + { + } + + void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + } + + void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& ElementSlots) + { + } + + void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + } + + void handleConstantString( const ConstantArray* CA ) + { + } + + + void handleGlobalConstantsEnd() + { + } + +}; + +} + +void llvm::BytecodeAnalyzer::AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID +) +{ + AnalyzerHandler TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/AnalyzerInternals.h b/lib/Bytecode/Reader/AnalyzerInternals.h new file mode 100644 index 0000000000..d9a2e843d8 --- /dev/null +++ b/lib/Bytecode/Reader/AnalyzerInternals.h @@ -0,0 +1,65 @@ +//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef ANALYZER_INTERNALS_H +#define ANALYZER_INTERNALS_H + +#include "Parser.h" +#include "llvm/Bytecode/Analyzer.h" + +// Enable to trace to figure out what the heck is going on when parsing fails +//#define TRACE_LEVEL 10 +//#define DEBUG_OUTPUT + +#if TRACE_LEVEL // ByteCodeReading_TRACEr +#define BCR_TRACE(n, X) \ + if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X +#else +#define BCR_TRACE(n, X) +#endif + +namespace llvm { + +class BytecodeAnalyzer { + BytecodeAnalyzer(const BytecodeAnalyzer &); // DO NOT IMPLEMENT + void operator=(const BytecodeAnalyzer &); // DO NOT IMPLEMENT +public: + BytecodeAnalyzer() { } + ~BytecodeAnalyzer() { } + + void AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void dump() const { + std::cerr << "BytecodeParser instance!\n"; + } +private: + BytecodeAnalysis TheAnalysis; +}; + +} // End llvm namespace + +#endif + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/AnalyzerWrappers.cpp b/lib/Bytecode/Reader/AnalyzerWrappers.cpp new file mode 100644 index 0000000000..a0e4845a1b --- /dev/null +++ b/lib/Bytecode/Reader/AnalyzerWrappers.cpp @@ -0,0 +1,208 @@ +//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements loading and analysis of a bytecode file and analyzing a +// bytecode buffer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bytecode/Analyzer.h" +#include "AnalyzerInternals.h" +#include "Support/FileUtilities.h" +#include "Support/StringExtras.h" +#include "Config/unistd.h" +#include <cerrno> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor. +// + +namespace { + /// BytecodeFileAnalyzer - parses a bytecode file from a file + class BytecodeFileAnalyzer : public BytecodeAnalyzer { + private: + unsigned char *Buffer; + unsigned Length; + + BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement + void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement + + public: + BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca); + ~BytecodeFileAnalyzer(); + }; +} + +static std::string ErrnoMessage (int savedErrNum, std::string descr) { + return ::strerror(savedErrNum) + std::string(", while trying to ") + descr; +} + +BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, + BytecodeAnalysis& bca) { + Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); + if (Buffer == 0) + throw "Error reading file '" + Filename + "'."; + + try { + // Parse the bytecode we mmapped in + if ( bca.dumpBytecode ) + DumpBytecode(Buffer, Length, bca, Filename); + AnalyzeBytecode(Buffer, Length, bca, Filename); + } catch (...) { + UnmapFileFromAddressSpace(Buffer, Length); + throw; + } +} + +BytecodeFileAnalyzer::~BytecodeFileAnalyzer() { + // Unmmap the bytecode... + UnmapFileFromAddressSpace(Buffer, Length); +} + +//===----------------------------------------------------------------------===// +// BytecodeBufferAnalyzer - Read from a memory buffer +// + +namespace { + /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer + /// + class BytecodeBufferAnalyzer : public BytecodeAnalyzer { + private: + const unsigned char *Buffer; + bool MustDelete; + + BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement + void operator=(const BytecodeBufferAnalyzer &BFR); // Do not implement + + public: + BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, + BytecodeAnalysis& bca, const std::string &ModuleID); + ~BytecodeBufferAnalyzer(); + + }; +} + +BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { + // If not aligned, allocate a new buffer to hold the bytecode... + const unsigned char *ParseBegin = 0; + if ((intptr_t)Buf & 3) { + Buffer = new unsigned char[Length+4]; + unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned + ParseBegin = Buffer + Offset; + memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over + MustDelete = true; + } else { + // If we don't need to copy it over, just use the caller's copy + ParseBegin = Buffer = Buf; + MustDelete = false; + } + try { + if ( bca.dumpBytecode ) + DumpBytecode(ParseBegin, Length, bca, ModuleID); + AnalyzeBytecode(ParseBegin, Length, bca, ModuleID); + } catch (...) { + if (MustDelete) delete [] Buffer; + throw; + } +} + +BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() { + if (MustDelete) delete [] Buffer; +} + +//===----------------------------------------------------------------------===// +// BytecodeStdinAnalyzer - Read bytecode from Standard Input +// + +namespace { + /// BytecodeStdinAnalyzer - parses a bytecode file from stdin + /// + class BytecodeStdinAnalyzer : public BytecodeAnalyzer { + private: + std::vector<unsigned char> FileData; + unsigned char *FileBuf; + + BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement + void operator=(const BytecodeStdinAnalyzer &BFR); // Do not implement + + public: + BytecodeStdinAnalyzer(BytecodeAnalysis& bca); + }; +} + +BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) { + int BlockSize; + unsigned char Buffer[4096*4]; + + // Read in all of the data from stdin, we cannot mmap stdin... + while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) { + if (BlockSize == -1) + throw ErrnoMessage(errno, "read from standard input"); + + FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); + } + + if (FileData.empty()) + throw std::string("Standard Input empty!"); + + FileBuf = &FileData[0]; + if (bca.dumpBytecode) + DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>"); + AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>"); +} + +//===----------------------------------------------------------------------===// +// Wrapper functions +//===----------------------------------------------------------------------===// + +// AnalyzeBytecodeFile - analyze one file +void llvm::AnalyzeBytecodeFile(const std::string &Filename, + BytecodeAnalysis& bca, + std::string *ErrorStr) +{ + try { + if ( Filename != "-" ) + BytecodeFileAnalyzer bfa(Filename,bca); + else + BytecodeStdinAnalyzer bsa(bca); + } catch (std::string &err) { + if (ErrorStr) *ErrorStr = err; + } +} + +// AnalyzeBytecodeBuffer - analyze a buffer +void llvm::AnalyzeBytecodeBuffer( + const unsigned char* Buffer, ///< Pointer to start of bytecode buffer + unsigned BufferSize, ///< Size of the bytecode buffer + BytecodeAnalysis& Results, ///< The results of the analysis + std::string* ErrorStr ///< Errors, if any. + ) +{ + try { + BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" ); + } catch (std::string& err ) { + if ( ErrorStr) *ErrorStr = err; + } +} + + +/// This function prints the contents of rhe BytecodeAnalysis structure in +/// a human legible form. +/// @brief Print BytecodeAnalysis structure to an ostream +void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) +{ + Out << "Not Implemented Yet.\n"; +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/Dumper.cpp b/lib/Bytecode/Reader/Dumper.cpp new file mode 100644 index 0000000000..6ff4ea0c79 --- /dev/null +++ b/lib/Bytecode/Reader/Dumper.cpp @@ -0,0 +1,311 @@ +//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeDumper class that gets called by the +// AbstractBytecodeParser when parsing events occur. It merely dumps the +// information presented to it from the parser. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Constant.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Type.h" + +using namespace llvm; + +namespace { + +class BytecodeDumper : public llvm::BytecodeHandler { +public: + + virtual bool handleError(const std::string& str ) + { + std::cout << "ERROR: " << str << "\n"; + return true; + } + + virtual void handleStart() + { + std::cout << "Bytecode {\n"; + } + + virtual void handleFinish() + { + std::cout << "} End Bytecode\n"; + } + + virtual void handleModuleBegin(const std::string& id) + { + std::cout << " Module " << id << " {\n"; + } + + virtual void handleModuleEnd(const std::string& id) + { + std::cout << " } End Module " << id << "\n"; + } + + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + std::cout << " RevisionNum: " << int(RevisionNum) + << " Endianness: " << Endianness + << " PointerSize: " << PointerSize << "\n"; + } + + virtual void handleModuleGlobalsBegin() + { + std::cout << " BLOCK: ModuleGlobalInfo {\n"; + } + + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV + ) + { + std::cout << " GV: Uninitialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() << "\n"; + } + + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + std::cout << " GV: Initialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() + << " InitializerSlot=" << initSlot << "\n"; + } + + virtual void handleType( const Type* Ty ) + { + std::cout << " Type: " << Ty->getDescription() << "\n"; + } + + virtual void handleFunctionDeclaration( const Type* FuncType ) + { + std::cout << " Function: " << FuncType->getDescription() << "\n"; + } + + virtual void handleModuleGlobalsEnd() + { + std::cout << " } END BLOCK: ModuleGlobalInfo\n"; + } + + void handleCompactionTableBegin() + { + std::cout << " BLOCK: CompactionTable {\n"; + } + + virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries << "\n"; + } + + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* Ty + ) + { + std::cout << " Type: " << i << " Slot:" << TypSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* Ty + ) + { + std::cout << " Value: " << i << " Slot:" << ValSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableEnd() + { + std::cout << " } END BLOCK: CompactionTable\n"; + } + + virtual void handleSymbolTableBegin() + { + std::cout << " BLOCK: SymbolTable {\n"; + } + + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries + << " Type: " << Typ->getDescription() << "\n"; + } + + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Type " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Value " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableEnd() + { + std::cout << " } END BLOCK: SymbolTable\n"; + } + + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + std::cout << " BLOCK: Function {\n"; + std::cout << " Linkage: " << linkage << "\n"; + std::cout << " Type: " << FType->getDescription() << "\n"; + } + + virtual void handleFunctionEnd( + const Type* FType + ) + { + std::cout << " } END BLOCK: Function\n"; + } + + virtual void handleBasicBlockBegin( + unsigned blocknum + ) + { + std::cout << " BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + std::cout << " INST: OpCode=" + << Instruction::getOpcodeName(Opcode) << " Type=" + << iType->getDescription() << "\n"; + for ( unsigned i = 0; i < Operands.size(); ++i ) + std::cout << " Op#" << i << " Slot=" << Operands[i] << "\n"; + + return Instruction::isTerminator(Opcode); + } + + virtual void handleBasicBlockEnd(unsigned blocknum) + { + std::cout << " } END BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual void handleGlobalConstantsBegin() + { + std::cout << " BLOCK: GlobalConstants {\n"; + } + + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + std::cout << " EXPR: " << Instruction::getOpcodeName(Opcode) + << " Type=" << Typ->getDescription() << "\n"; + for ( unsigned i = 0; i < ArgVec.size(); ++i ) + std::cout << " Arg#" << i << " Type=" + << ArgVec[i].first->getDescription() << " Slot=" + << ArgVec[i].second << "\n"; + } + + virtual void handleConstantValue( Constant * c ) + { + std::cout << " VALUE: "; + c->print(std::cout); + std::cout << "\n"; + } + + virtual void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + std::cout << " ARRAY: " << AT->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& Elements) + { + std::cout << " STRUC: " << ST->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + std::cout << " POINT: " << PT->getDescription() + << " Slot=" << Slot << "\n"; + } + + virtual void handleConstantString( const ConstantArray* CA ) + { + std::cout << " STRNG: "; + CA->print(std::cout); + std::cout << "\n"; + } + + virtual void handleGlobalConstantsEnd() + { + std::cout << " } END BLOCK: GlobalConstants\n"; + } +}; + +} + +void BytecodeAnalyzer::DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ) +{ + BytecodeDumper TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp new file mode 100644 index 0000000000..d236b64aae --- /dev/null +++ b/lib/Bytecode/Reader/Parser.cpp @@ -0,0 +1,877 @@ +//===- Reader.cpp - Code to read bytecode files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Allow passing in an option to ignore the symbol table +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Module.h" +#include "llvm/Bytecode/Format.h" +#include "Support/StringExtras.h" +#include <iostream> +#include <sstream> + +using namespace llvm; + +#define PARSE_ERROR(inserters) \ + { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + if ( ! handler->handleError( errormsg.str() ) ) \ + throw std::string(errormsg.str()); \ + } + +const Type *AbstractBytecodeParser::getType(unsigned ID) { + //cerr << "Looking up Type ID: " << ID << "\n"; + + if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + ID -= Type::FirstDerivedTyID; + + if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; + } + + // Is it a module-level type? + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); + + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); + + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; +} + +bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, + std::vector<unsigned> &Operands) { + Operands.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read(Buf, EndBuf); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Operands.resize((Op >> 0) & 03); + + switch (Operands.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Operands[0] = (Op >> 20) & 4095; + if (Operands[0] == 4095) // Handle special encoding for 0 operands... + Operands.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Operands[0] = (Op >> 16) & 255; + Operands[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Operands[0] = (Op >> 14) & 63; + Operands[1] = (Op >> 20) & 63; + Operands[2] = (Op >> 26) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(Buf, EndBuf); + Opcode >>= 2; + iType = read_vbr_uint(Buf, EndBuf); + + unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + Operands.resize(NumOperands); + + if (NumOperands == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOperands; ++i) + Operands[i] = read_vbr_uint(Buf, EndBuf); + align32(Buf, EndBuf); + break; + } + + return handler->handleInstruction(Opcode, getType(iType), Operands); +} + +/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one +/// basicblock at a time. This method reads in one of the basicblock packets. +void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, + BufPtr EndBuf, + unsigned BlockNo) { + handler->handleBasicBlockBegin( BlockNo ); + + std::vector<unsigned> Args; + bool is_terminating = false; + while (Buf < EndBuf) + is_terminating = ParseInstruction(Buf, EndBuf, Args); + + if ( ! is_terminating ) + PARSE_ERROR( + "Failed to recognize instruction as terminating at end of block"); + + handler->handleBasicBlockEnd( BlockNo ); +} + + +/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the +/// body of a function. In post 1.0 bytecode files, we no longer emit basic +/// block individually, in order to avoid per-basic-block overhead. +unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) { + unsigned BlockNo = 0; + std::vector<unsigned> Args; + + while (Buf < EndBuf) { + handler->handleBasicBlockBegin( BlockNo ); + + // Read instructions into this basic block until we get to a terminator + bool is_terminating = false; + while (Buf < EndBuf && !is_terminating ) + is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + + if (!is_terminating) + PARSE_ERROR( "Non-terminated basic block found!"); + + handler->handleBasicBlockEnd( BlockNo ); + ++BlockNo; + } + return BlockNo; +} + +void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { + handler->handleSymbolTableBegin(); + + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + + handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); + + for (unsigned i = 0; i != NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(Buf, EndBuf); + std::string Name = read_str(Buf, EndBuf); + + if (Typ == Type::TypeTyID) + handler->handleSymbolTableType( i, slot, Name ); + else + handler->handleSymbolTableValue( i, slot, Name ); + } + } + + if (Buf > EndBuf) + PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + + handler->handleSymbolTableEnd(); +} + +void AbstractBytecodeParser |