Commit For New Tool: llvm-abcd (Analysis of ByteCode Dumper). This tool

will (eventually) provide statistical analysis of bytecode files as well as the ability to dump them in a low level format (slot numbers not resolved). The purpose of this is to aid in the Type!=Value change of bug 122. With this initial release, llvm-abcd merely dumps out the bytecode. However, the infrastructure for separating bytecode parsing from handling the parsing events is in place. The style chosen is similar to SAX XML parsing where a handler object is called to handlign the parsing events. This probably isn't useful to anyone but me right now as there is no analysis yet, and the dumper doesn't work on every bytecode file. It will probably be useful by the end of this week. Note that there is some duplication of code from the bytecode reader. This was done to eliminate errors from being introduced in the reader and to minimize the impact to other LLVM developers. At some point, the Analyzer and the Reader will be integrated to use the same infrastructure. Also, sorry for the minor change to Instruction.h but I just couldn't bring myself to write code that depends on Instruction internals. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14048 91177308-0d34-0410-b5e6-96231b3b80d8
author: Reid Spencer <rspencer@reidspencer.com> 2004-06-07 17:53:43 +0000
committer: Reid Spencer <rspencer@reidspencer.com> 2004-06-07 17:53:43 +0000
commit: dac69c83c22a00d3f8de3bb4d62b9dbeb0a20caf (patch)
tree: b5d0372bb98f6c5080d09fa6da48cb17ded7a08e /lib/Bytecode/Reader
parent: d66215607c214a00c79da6625b7de5e0d25a360f (diff)
6 files changed, 1881 insertions, 0 deletions
diff --git a/lib/Bytecode/Reader/Analyzer.cpp b/lib/Bytecode/Reader/Analyzer.cpp
new file mode 100644
index 0000000000..99c3e41f9f
--- /dev/null
+++ b/lib/Bytecode/Reader/Analyzer.cpp
@@ -0,0 +1,242 @@
+//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the 
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//  This header file defines the BytecodeHandler class that gets called by the
+//  AbstractBytecodeParser when parsing events occur.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class AnalyzerHandler : public BytecodeHandler {
+public:
+  bool handleError(const std::string& str )
+  {
+    return false;
+  }
+
+  void handleStart()
+  {
+  }
+
+  void handleFinish()
+  {
+  }
+
+  void handleModuleBegin(const std::string& id)
+  {
+  }
+
+  void handleModuleEnd(const std::string& id)
+  {
+  }
+
+  void handleVersionInfo(
+    unsigned char RevisionNum,        ///< Byte code revision number
+    Module::Endianness Endianness,    ///< Endianness indicator
+    Module::PointerSize PointerSize   ///< PointerSize indicator
+  )
+  {
+  }
+
+  void handleModuleGlobalsBegin()
+  {
+  }
+
+  void handleGlobalVariable( 
+    const Type* ElemType,     ///< The type of the global variable
+    bool isConstant,          ///< Whether the GV is constant or not
+    GlobalValue::LinkageTypes ///< The linkage type of the GV
+  )
+  {
+  }
+
+  void handleInitializedGV( 
+    const Type* ElemType,     ///< The type of the global variable
+    bool isConstant,          ///< Whether the GV is constant or not
+    GlobalValue::LinkageTypes,///< The linkage type of the GV
+    unsigned initSlot         ///< Slot number of GV's initializer
+  )
+  {
+  }
+
+  virtual void handleType( const Type* Ty ) 
+  {
+  }
+
+  void handleFunctionDeclaration( 
+    const Type* FuncType      ///< The type of the function
+  )
+  {
+  }
+
+  void handleModuleGlobalsEnd()
+  {
+  }
+
+  void handleCompactionTableBegin()
+  {
+  }
+
+  void handleCompactionTablePlane( 
+    unsigned Ty, 
+    unsigned NumEntries
+  )
+  {
+  }
+
+  void handleCompactionTableType( 
+    unsigned i, 
+    unsigned TypSlot, 
+    const Type* 
+  )
+  {
+  }
+
+  void handleCompactionTableValue( 
+    unsigned i, 
+    unsigned ValSlot, 
+    const Type* 
+  )
+  {
+  }
+
+  void handleCompactionTableEnd()
+  {
+  }
+
+  void handleSymbolTableBegin()
+  {
+  }
+
+  void handleSymbolTablePlane( 
+    unsigned Ty, 
+    unsigned NumEntries, 
+    const Type* Typ
+  )
+  {
+  }
+
+  void handleSymbolTableType( 
+    unsigned i, 
+    unsigned slot, 
+    const std::string& name 
+  )
+  {
+  }
+
+  void handleSymbolTableValue( 
+    unsigned i, 
+    unsigned slot, 
+    const std::string& name 
+  )
+  {
+  }
+
+  void handleSymbolTableEnd()
+  {
+  }
+
+  void handleFunctionBegin(
+    const Type* FType, 
+    GlobalValue::LinkageTypes linkage 
+  )
+  {
+  }
+
+  void handleFunctionEnd(
+    const Type* FType
+  )
+  {
+  }
+
+  void handleBasicBlockBegin(
+    unsigned blocknum
+  )
+  {
+  }
+
+  bool handleInstruction(
+    unsigned Opcode, 
+    const Type* iType, 
+    std::vector<unsigned>& Operands
+  )
+  {
+    return false;
+  }
+
+  void handleBasicBlockEnd(unsigned blocknum)
+  {
+  }
+
+  void handleGlobalConstantsBegin()
+  {
+  }
+
+  void handleConstantExpression( 
+      unsigned Opcode, 
+      const Type* Typ, 
+      std::vector<std::pair<const Type*,unsigned> > ArgVec 
+    )
+  {
+  }
+
+  void handleConstantValue( Constant * c )
+  {
+  }
+
+  void handleConstantArray( 
+	  const ArrayType* AT, 
+	  std::vector<unsigned>& Elements )
+  {
+  }
+
+  void handleConstantStruct(
+	const StructType* ST,
+	std::vector<unsigned>& ElementSlots)
+  {
+  }
+
+  void handleConstantPointer(
+	const PointerType* PT, unsigned Slot)
+  {
+  }
+
+  void handleConstantString( const ConstantArray* CA ) 
+  {
+  }
+
+
+  void handleGlobalConstantsEnd()
+  {
+  }
+
+};
+
+}
+
+void llvm::BytecodeAnalyzer::AnalyzeBytecode(
+    const unsigned char *Buf, 
+    unsigned Length,
+    BytecodeAnalysis& bca,
+    const std::string &ModuleID
+)
+{
+  AnalyzerHandler TheHandler;
+  AbstractBytecodeParser TheParser(&TheHandler);
+  TheParser.ParseBytecode( Buf, Length, ModuleID );
+  TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/AnalyzerInternals.h b/lib/Bytecode/Reader/AnalyzerInternals.h
new file mode 100644
index 0000000000..d9a2e843d8
--- /dev/null
+++ b/lib/Bytecode/Reader/AnalyzerInternals.h
@@ -0,0 +1,65 @@
+//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//  This header file defines various stuff that is used by the bytecode reader.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ANALYZER_INTERNALS_H
+#define ANALYZER_INTERNALS_H
+
+#include "Parser.h"
+#include "llvm/Bytecode/Analyzer.h"
+
+// Enable to trace to figure out what the heck is going on when parsing fails
+//#define TRACE_LEVEL 10
+//#define DEBUG_OUTPUT
+
+#if TRACE_LEVEL    // ByteCodeReading_TRACEr
+#define BCR_TRACE(n, X) \
+    if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
+#else
+#define BCR_TRACE(n, X)
+#endif
+
+namespace llvm {
+
+class BytecodeAnalyzer {
+  BytecodeAnalyzer(const BytecodeAnalyzer &);  // DO NOT IMPLEMENT
+  void operator=(const BytecodeAnalyzer &);  // DO NOT IMPLEMENT
+public:
+  BytecodeAnalyzer() { }
+  ~BytecodeAnalyzer() { }
+
+  void AnalyzeBytecode(
+    const unsigned char *Buf, 
+    unsigned Length,
+    BytecodeAnalysis& bca,
+    const std::string &ModuleID
+  );
+
+  void DumpBytecode(
+    const unsigned char *Buf, 
+    unsigned Length,
+    BytecodeAnalysis& bca,
+    const std::string &ModuleID
+  );
+
+  void dump() const {
+    std::cerr << "BytecodeParser instance!\n";
+  }
+private:
+  BytecodeAnalysis TheAnalysis;
+};
+
+} // End llvm namespace
+
+#endif
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/AnalyzerWrappers.cpp b/lib/Bytecode/Reader/AnalyzerWrappers.cpp
new file mode 100644
index 0000000000..a0e4845a1b
--- /dev/null
+++ b/lib/Bytecode/Reader/AnalyzerWrappers.cpp
@@ -0,0 +1,208 @@
+//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer  -------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements loading and analysis of a bytecode file and analyzing a
+// bytecode buffer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bytecode/Analyzer.h"
+#include "AnalyzerInternals.h"
+#include "Support/FileUtilities.h"
+#include "Support/StringExtras.h"
+#include "Config/unistd.h"
+#include <cerrno>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor.
+//
+
+namespace {
+  /// BytecodeFileAnalyzer - parses a bytecode file from a file
+  class BytecodeFileAnalyzer : public BytecodeAnalyzer {
+  private:
+    unsigned char *Buffer;
+    unsigned Length;
+
+    BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement
+    void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement
+
+  public:
+    BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca);
+    ~BytecodeFileAnalyzer();
+  };
+}
+
+static std::string ErrnoMessage (int savedErrNum, std::string descr) {
+   return ::strerror(savedErrNum) + std::string(", while trying to ") + descr;
+}
+
+BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, 
+	                                   BytecodeAnalysis& bca) {
+  Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
+  if (Buffer == 0)
+    throw "Error reading file '" + Filename + "'.";
+
+  try {
+    // Parse the bytecode we mmapped in
+    if ( bca.dumpBytecode ) 
+      DumpBytecode(Buffer, Length, bca, Filename);
+    AnalyzeBytecode(Buffer, Length, bca, Filename);
+  } catch (...) {
+    UnmapFileFromAddressSpace(Buffer, Length);
+    throw;
+  }
+}
+
+BytecodeFileAnalyzer::~BytecodeFileAnalyzer() {
+  // Unmmap the bytecode...
+  UnmapFileFromAddressSpace(Buffer, Length);
+}
+
+//===----------------------------------------------------------------------===//
+// BytecodeBufferAnalyzer - Read from a memory buffer
+//
+
+namespace {
+  /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer
+  ///
+  class BytecodeBufferAnalyzer : public BytecodeAnalyzer {
+  private:
+    const unsigned char *Buffer;
+    bool MustDelete;
+
+    BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement
+    void operator=(const BytecodeBufferAnalyzer &BFR);   // Do not implement
+
+  public:
+    BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length,
+	                   BytecodeAnalysis& bca, const std::string &ModuleID);
+    ~BytecodeBufferAnalyzer();
+
+  };
+}
+
+BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf,
+					       unsigned Length,
+					       BytecodeAnalysis& bca,
+					       const std::string &ModuleID) {
+  // If not aligned, allocate a new buffer to hold the bytecode...
+  const unsigned char *ParseBegin = 0;
+  if ((intptr_t)Buf & 3) {
+    Buffer = new unsigned char[Length+4];
+    unsigned Offset = 4 - ((intptr_t)Buffer & 3);   // Make sure it's aligned
+    ParseBegin = Buffer + Offset;
+    memcpy((unsigned char*)ParseBegin, Buf, Length);    // Copy it over
+    MustDelete = true;
+  } else {
+    // If we don't need to copy it over, just use the caller's copy
+    ParseBegin = Buffer = Buf;
+    MustDelete = false;
+  }
+  try {
+    if ( bca.dumpBytecode ) 
+      DumpBytecode(ParseBegin, Length, bca, ModuleID);
+    AnalyzeBytecode(ParseBegin, Length, bca, ModuleID);
+  } catch (...) {
+    if (MustDelete) delete [] Buffer;
+    throw;
+  }
+}
+
+BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() {
+  if (MustDelete) delete [] Buffer;
+}
+
+//===----------------------------------------------------------------------===//
+//  BytecodeStdinAnalyzer - Read bytecode from Standard Input
+//
+
+namespace {
+  /// BytecodeStdinAnalyzer - parses a bytecode file from stdin
+  /// 
+  class BytecodeStdinAnalyzer : public BytecodeAnalyzer {
+  private:
+    std::vector<unsigned char> FileData;
+    unsigned char *FileBuf;
+
+    BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement
+    void operator=(const BytecodeStdinAnalyzer &BFR);  // Do not implement
+
+  public:
+    BytecodeStdinAnalyzer(BytecodeAnalysis& bca);
+  };
+}
+
+BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) {
+  int BlockSize;
+  unsigned char Buffer[4096*4];
+
+  // Read in all of the data from stdin, we cannot mmap stdin...
+  while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) {
+    if (BlockSize == -1)
+      throw ErrnoMessage(errno, "read from standard input");
+    
+    FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
+  }
+
+  if (FileData.empty())
+    throw std::string("Standard Input empty!");
+
+  FileBuf = &FileData[0];
+  if (bca.dumpBytecode)
+    DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>");
+  AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>");
+}
+
+//===----------------------------------------------------------------------===//
+// Wrapper functions
+//===----------------------------------------------------------------------===//
+
+// AnalyzeBytecodeFile - analyze one file
+void llvm::AnalyzeBytecodeFile(const std::string &Filename, 
+                               BytecodeAnalysis& bca,
+                               std::string *ErrorStr) 
+{
+  try {
+    if ( Filename != "-" )
+      BytecodeFileAnalyzer bfa(Filename,bca);
+    else
+      BytecodeStdinAnalyzer bsa(bca);
+  } catch (std::string &err) {
+    if (ErrorStr) *ErrorStr = err;
+  }
+}
+
+// AnalyzeBytecodeBuffer - analyze a buffer
+void llvm::AnalyzeBytecodeBuffer(
+       const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
+       unsigned BufferSize,         ///< Size of the bytecode buffer
+       BytecodeAnalysis& Results,   ///< The results of the analysis
+       std::string* ErrorStr        ///< Errors, if any.
+     ) 
+{
+  try {
+    BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" );
+  } catch (std::string& err ) {
+    if ( ErrorStr) *ErrorStr = err;
+  }
+}
+
+
+/// This function prints the contents of rhe BytecodeAnalysis structure in
+/// a human legible form.
+/// @brief Print BytecodeAnalysis structure to an ostream
+void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out )
+{
+  Out << "Not Implemented Yet.\n";
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Dumper.cpp b/lib/Bytecode/Reader/Dumper.cpp
new file mode 100644
index 0000000000..6ff4ea0c79
--- /dev/null
+++ b/lib/Bytecode/Reader/Dumper.cpp
@@ -0,0 +1,311 @@
+//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the 
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//  This header file defines the BytecodeDumper class that gets called by the
+//  AbstractBytecodeParser when parsing events occur. It merely dumps the
+//  information presented to it from the parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+namespace {
+
+class BytecodeDumper : public llvm::BytecodeHandler {
+public:
+
+  virtual bool handleError(const std::string& str )
+  {
+    std::cout << "ERROR: " << str << "\n";
+    return true;
+  }
+
+  virtual void handleStart()
+  {
+    std::cout << "Bytecode {\n";
+  }
+
+  virtual void handleFinish()
+  {
+    std::cout << "} End Bytecode\n";
+  }
+
+  virtual void handleModuleBegin(const std::string& id)
+  {
+    std::cout << "  Module " << id << " {\n";
+  }
+
+  virtual void handleModuleEnd(const std::string& id)
+  {
+    std::cout << "  } End Module " << id << "\n";
+  }
+
+  virtual void handleVersionInfo(
+    unsigned char RevisionNum,        ///< Byte code revision number
+    Module::Endianness Endianness,    ///< Endianness indicator
+    Module::PointerSize PointerSize   ///< PointerSize indicator
+  )
+  {
+    std::cout << "    RevisionNum: " << int(RevisionNum) 
+	      << " Endianness: " << Endianness
+	      << " PointerSize: " << PointerSize << "\n";
+  }
+
+  virtual void handleModuleGlobalsBegin()
+  {
+    std::cout << "    BLOCK: ModuleGlobalInfo {\n";
+  }
+
+  virtual void handleGlobalVariable( 
+    const Type* ElemType,     ///< The type of the global variable
+    bool isConstant,          ///< Whether the GV is constant or not
+    GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV
+  )
+  {
+    std::cout << "      GV: Uninitialized, " 
+	     << ( isConstant? "Constant, " : "Variable, ")
+	     << " Linkage=" << Linkage << " Type=" 
+	     << ElemType->getDescription() << "\n"; 
+  }
+
+  virtual void handleInitializedGV( 
+    const Type* ElemType,     ///< The type of the global variable
+    bool isConstant,          ///< Whether the GV is constant or not
+    GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV
+    unsigned initSlot         ///< Slot number of GV's initializer
+  )
+  {
+    std::cout << "      GV: Initialized, " 
+	     << ( isConstant? "Constant, " : "Variable, ")
+	     << " Linkage=" << Linkage << " Type=" 
+	     << ElemType->getDescription()
+	     << " InitializerSlot=" << initSlot << "\n"; 
+  }
+
+  virtual void handleType( const Type* Ty ) 
+  {
+    std::cout << "      Type: " << Ty->getDescription() << "\n";
+  }
+
+  virtual void handleFunctionDeclaration( const Type* FuncType )
+  {
+    std::cout << "      Function: " << FuncType->getDescription() << "\n";
+  }
+
+  virtual void handleModuleGlobalsEnd()
+  {
+    std::cout << "    } END BLOCK: ModuleGlobalInfo\n";
+  }
+
+  void handleCompactionTableBegin()
+  {
+    std::cout << "    BLOCK: CompactionTable {\n";
+  }
+
+  virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries )
+  {
+    std::cout << "      Plane: Ty=" << Ty << " Size=" << NumEntries << "\n";
+  }
+
+  virtual void handleCompactionTableType( 
+    unsigned i, 
+    unsigned TypSlot, 
+    const Type* Ty
+  )
+  {
+    std::cout << "        Type: " << i << " Slot:" << TypSlot 
+	      << " is " << Ty->getDescription() << "\n"; 
+  }
+
+  virtual void handleCompactionTableValue( 
+    unsigned i, 
+    unsigned ValSlot, 
+    const Type* Ty 
+  )
+  {
+    std::cout << "        Value: " << i << " Slot:" << ValSlot 
+	      << " is " << Ty->getDescription() << "\n"; 
+  }
+
+  virtual void handleCompactionTableEnd()
+  {
+    std::cout << "    } END BLOCK: CompactionTable\n";
+  }
+
+  virtual void handleSymbolTableBegin()
+  {
+    std::cout << "    BLOCK: SymbolTable {\n";
+  }
+
+  virtual void handleSymbolTablePlane( 
+    unsigned Ty, 
+    unsigned NumEntries, 
+    const Type* Typ
+  )
+  {
+    std::cout << "      Plane: Ty=" << Ty << " Size=" << NumEntries
+	      << " Type: " << Typ->getDescription() << "\n"; 
+  }
+
+  virtual void handleSymbolTableType( 
+    unsigned i, 
+    unsigned slot, 
+    const std::string& name 
+  )
+  {
+    std::cout << "        Type " << i << " Slot=" << slot
+	      << " Name: " << name << "\n"; 
+  }
+
+  virtual void handleSymbolTableValue( 
+    unsigned i, 
+    unsigned slot, 
+    const std::string& name 
+  )
+  {
+    std::cout << "        Value " << i << " Slot=" << slot
+	      << " Name: " << name << "\n";
+  }
+
+  virtual void handleSymbolTableEnd()
+  {
+    std::cout << "    } END BLOCK: SymbolTable\n";
+  }
+
+  virtual void handleFunctionBegin(
+    const Type* FType, 
+    GlobalValue::LinkageTypes linkage 
+  )
+  {
+    std::cout << "    BLOCK: Function {\n";
+    std::cout << "      Linkage: " << linkage << "\n";
+    std::cout << "      Type: " << FType->getDescription() << "\n";
+  }
+
+  virtual void handleFunctionEnd(
+    const Type* FType
+  )
+  {
+    std::cout << "    } END BLOCK: Function\n";
+  }
+
+  virtual void handleBasicBlockBegin(
+    unsigned blocknum
+  )
+  {
+    std::cout << "      BLOCK: BasicBlock #" << blocknum << "{\n";
+  }
+
+  virtual bool handleInstruction(
+    unsigned Opcode, 
+    const Type* iType, 
+    std::vector<unsigned>& Operands
+  )
+  {
+    std::cout << "        INST: OpCode=" 
+	      << Instruction::getOpcodeName(Opcode) << " Type=" 
+	      << iType->getDescription() << "\n";
+    for ( unsigned i = 0; i < Operands.size(); ++i ) 
+      std::cout << "          Op#" << i << " Slot=" << Operands[i] << "\n";
+    
+    return Instruction::isTerminator(Opcode); 
+  }
+
+  virtual void handleBasicBlockEnd(unsigned blocknum)
+  {
+    std::cout << "      } END BLOCK: BasicBlock #" << blocknum << "{\n";
+  }
+
+  virtual void handleGlobalConstantsBegin()
+  {
+    std::cout << "    BLOCK: GlobalConstants {\n";
+  }
+
+  virtual void handleConstantExpression( 
+      unsigned Opcode, 
+      const Type* Typ, 
+      std::vector<std::pair<const Type*,unsigned> > ArgVec 
+    )
+  {
+    std::cout << "      EXPR: " << Instruction::getOpcodeName(Opcode)
+	      << " Type=" << Typ->getDescription() << "\n";
+    for ( unsigned i = 0; i < ArgVec.size(); ++i ) 
+      std::cout << "        Arg#" << i << " Type=" 
+	<< ArgVec[i].first->getDescription() << " Slot=" 
+	<< ArgVec[i].second << "\n";
+  }
+
+  virtual void handleConstantValue( Constant * c )
+  {
+    std::cout << "      VALUE: ";
+    c->print(std::cout);
+    std::cout << "\n";
+  }
+
+  virtual void handleConstantArray( 
+	  const ArrayType* AT, 
+	  std::vector<unsigned>& Elements )
+  {
+    std::cout << "      ARRAY: " << AT->getDescription() << "\n";
+    for ( unsigned i = 0; i < Elements.size(); ++i ) 
+      std::cout << "        #" << i << " Slot=" << Elements[i] << "\n";
+  }
+
+  virtual void handleConstantStruct(
+	const StructType* ST,
+	std::vector<unsigned>& Elements)
+  {
+    std::cout << "      STRUC: " << ST->getDescription() << "\n";
+    for ( unsigned i = 0; i < Elements.size(); ++i ) 
+      std::cout << "        #" << i << " Slot=" << Elements[i] << "\n";
+  }
+
+  virtual void handleConstantPointer(
+	const PointerType* PT, unsigned Slot)
+  {
+    std::cout << "      POINT: " << PT->getDescription() 
+	      << " Slot=" << Slot << "\n";
+  }
+
+  virtual void handleConstantString( const ConstantArray* CA ) 
+  {
+    std::cout << "      STRNG: ";
+    CA->print(std::cout); 
+    std::cout << "\n";
+  }
+
+  virtual void handleGlobalConstantsEnd()
+  {
+    std::cout << "    } END BLOCK: GlobalConstants\n";
+  }
+};
+
+}
+
+void BytecodeAnalyzer::DumpBytecode(
+    const unsigned char *Buf, 
+    unsigned Length,
+    BytecodeAnalysis& bca,
+    const std::string &ModuleID
+  )
+{
+  BytecodeDumper TheHandler;
+  AbstractBytecodeParser TheParser(&TheHandler);
+  TheParser.ParseBytecode( Buf, Length, ModuleID );
+  TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp
new file mode 100644
index 0000000000..d236b64aae
--- /dev/null
+++ b/lib/Bytecode/Reader/Parser.cpp
@@ -0,0 +1,877 @@
+//===- Reader.cpp - Code to read bytecode files ---------------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Bytecode/Reader.h
+//
+// Note that this library should be as fast as possible, reentrant, and 
+// threadsafe!!
+//
+// TODO: Allow passing in an option to ignore the symbol table
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Module.h"
+#include "llvm/Bytecode/Format.h"
+#include "Support/StringExtras.h"
+#include <iostream>
+#include <sstream>
+
+using namespace llvm;
+
+#define PARSE_ERROR(inserters) \
+  { \
+    std::ostringstream errormsg; \
+    errormsg << inserters; \
+    if ( ! handler->handleError( errormsg.str() ) ) \
+      throw std::string(errormsg.str()); \
+  }
+
+const Type *AbstractBytecodeParser::getType(unsigned ID) {
+  //cerr << "Looking up Type ID: " << ID << "\n";
+
+  if (ID < Type::FirstDerivedTyID)
+    if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
+      return T;   // Asked for a primitive type...
+
+  // Otherwise, derived types need offset...
+  ID -= Type::FirstDerivedTyID;
+
+  if (!CompactionTypeTable.empty()) {
+    if (ID >= CompactionTypeTable.size())
+      PARSE_ERROR("Type ID out of range for compaction table!");
+    return CompactionTypeTable[ID];
+  }
+
+  // Is it a module-level type?
+  if (ID < ModuleTypes.size())
+    return ModuleTypes[ID].get();
+
+  // Nope, is it a function-level type?
+  ID -= ModuleTypes.size();
+  if (ID < FunctionTypes.size())
+    return FunctionTypes[ID].get();
+
+  PARSE_ERROR("Illegal type reference!");
+  return Type::VoidTy;
+}
+
+bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
+                                      std::vector<unsigned> &Operands) {
+  Operands.clear();
+  unsigned iType = 0;
+  unsigned Opcode = 0;
+  unsigned Op = read(Buf, EndBuf);
+
+  // bits   Instruction format:        Common to all formats
+  // --------------------------
+  // 01-00: Opcode type, fixed to 1.
+  // 07-02: Opcode
+  Opcode    = (Op >> 2) & 63;
+  Operands.resize((Op >> 0) & 03);
+
+  switch (Operands.size()) {
+  case 1:
+    // bits   Instruction format:
+    // --------------------------
+    // 19-08: Resulting type plane
+    // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
+    //
+    iType   = (Op >>  8) & 4095;
+    Operands[0] = (Op >> 20) & 4095;
+    if (Operands[0] == 4095)    // Handle special encoding for 0 operands...
+      Operands.resize(0);
+    break;
+  case 2:
+    // bits   Instruction format:
+    // --------------------------
+    // 15-08: Resulting type plane
+    // 23-16: Operand #1
+    // 31-24: Operand #2  
+    //
+    iType   = (Op >>  8) & 255;
+    Operands[0] = (Op >> 16) & 255;
+    Operands[1] = (Op >> 24) & 255;
+    break;
+  case 3:
+    // bits   Instruction format:
+    // --------------------------
+    // 13-08: Resulting type plane
+    // 19-14: Operand #1
+    // 25-20: Operand #2
+    // 31-26: Operand #3
+    //
+    iType   = (Op >>  8) & 63;
+    Operands[0] = (Op >> 14) & 63;
+    Operands[1] = (Op >> 20) & 63;
+    Operands[2] = (Op >> 26) & 63;
+    break;
+  case 0:
+    Buf -= 4;  // Hrm, try this again...
+    Opcode = read_vbr_uint(Buf, EndBuf);
+    Opcode >>= 2;
+    iType = read_vbr_uint(Buf, EndBuf);
+
+    unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
+    Operands.resize(NumOperands);
+
+    if (NumOperands == 0)
+      PARSE_ERROR("Zero-argument instruction found; this is invalid.");
+
+    for (unsigned i = 0; i != NumOperands; ++i)
+      Operands[i] = read_vbr_uint(Buf, EndBuf);
+    align32(Buf, EndBuf);
+    break;
+  }
+
+  return handler->handleInstruction(Opcode, getType(iType), Operands);
+}
+
+/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
+/// basicblock at a time.  This method reads in one of the basicblock packets.
+void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
+                                            BufPtr EndBuf,
+                                            unsigned BlockNo) {
+  handler->handleBasicBlockBegin( BlockNo );
+
+  std::vector<unsigned> Args;
+  bool is_terminating = false;
+  while (Buf < EndBuf)
+    is_terminating = ParseInstruction(Buf, EndBuf, Args);
+
+  if ( ! is_terminating )
+    PARSE_ERROR(
+      "Failed to recognize instruction as terminating at end of block");
+
+  handler->handleBasicBlockEnd( BlockNo );
+}
+
+
+/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
+/// body of a function.  In post 1.0 bytecode files, we no longer emit basic
+/// block individually, in order to avoid per-basic-block overhead.
+unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) {
+  unsigned BlockNo = 0;
+  std::vector<unsigned> Args;
+
+  while (Buf < EndBuf) {
+    handler->handleBasicBlockBegin( BlockNo );
+
+    // Read instructions into this basic block until we get to a terminator
+    bool is_terminating = false;
+    while (Buf < EndBuf && !is_terminating )
+	is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
+
+    if (!is_terminating)
+      PARSE_ERROR( "Non-terminated basic block found!");
+
+    handler->handleBasicBlockEnd( BlockNo );
+    ++BlockNo;
+  }
+  return BlockNo;
+}
+
+void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
+  handler->handleSymbolTableBegin();
+
+  while (Buf < EndBuf) {
+    // Symtab block header: [num entries][type id number]
+    unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+    unsigned Typ = read_vbr_uint(Buf, EndBuf);
+    const Type *Ty = getType(Typ);
+
+    handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
+
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      // Symtab entry: [def slot #][name]
+      unsigned slot = read_vbr_uint(Buf, EndBuf);
+      std::string Name = read_str(Buf, EndBuf);
+
+      if (Typ == Type::TypeTyID)
+        handler->handleSymbolTableType( i, slot, Name );
+      else
+	handler->handleSymbolTableValue( i, slot, Name );
+    }
+  }
+
+  if (Buf > EndBuf) 
+    PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
+
+  handler->handleSymbolTableEnd();
+}
+
+void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
+  if (FunctionSignatureList.empty())
+    throw std::string("FunctionSignatureList empty!");
+
+  const Type *FType = FunctionSignatureList.back();
+  FunctionSignatureList.pop_back();
+
+  // Save the information for future reading of the function
+  LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
+  // Pretend we've `parsed' this function
+  Buf = EndBuf;
+}
+
+void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
+  // Find {start, end} pointers and slot in the map. If not there, we're done.
+  LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
+
+  // Make sure we found it
+  if ( Fi == LazyFunctionLoadMap.end() ) {
+    PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
+    return;
+  }
+
+  BufPtr Buf = Fi->second.Buf;
+  BufPtr EndBuf = Fi->second.EndBuf;
+  assert(Fi->first == FType);
+
+  LazyFunctionLoadMap.erase(Fi);
+
+  this->ParseFunctionBody( FType, Buf, EndBuf );
+}
+
+void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, 
+                                               BufPtr &Buf, BufPtr EndBuf ) {
+
+  GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
+
+  unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
+  switch (LinkageType) {
+  case 0: Linkage = GlobalValue::ExternalLinkage; break;
+  case 1: Linkage = GlobalValue::WeakLinkage; break;
+  case 2: Linkage = GlobalValue::AppendingLinkage; break;
+  case 3: Linkage = GlobalValue::InternalLinkage; break;
+  case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
+  default:
+    PARSE_ERROR("Invalid linkage type for Function.");
+    Linkage = GlobalValue::InternalLinkage;
+    break;
+  }
+
+  handler->handleFunctionBegin(FType,Linkage);
+
+  // Keep track of how many basic blocks we have read in...
+  unsigned BlockNum = 0;
+  bool InsertedArguments = false;
+
+  while (Buf < EndBuf) {
+    unsigned Type, Size;
+    BufPtr OldBuf = Buf;
+    readBlock(Buf, EndBuf, Type, Size);
+
+    switch (Type) {
+    case BytecodeFormat::ConstantPool:
+      ParseConstantPool(Buf, Buf+Size, FunctionTypes );
+      break;
+
+    case BytecodeFormat::CompactionTable:
+      ParseCompactionTable(Buf, Buf+Size);
+      break;
+
+    case BytecodeFormat::BasicBlock:
+      ParseBasicBlock(Buf, Buf+Size, BlockNum++);
+      break;
+
+    case BytecodeFormat::InstructionList:
+      if (BlockNum) 
+	PARSE_ERROR("InstructionList must come before basic blocks!");
+      BlockNum = ParseInstructionList(Buf, Buf+Size);
+      break;
+
+    case BytecodeFormat::SymbolTable:
+      ParseSymbolTable(Buf, Buf+Size );
+      break;
+
+    default:
+      Buf += Size;
+      if (OldBuf > Buf)
+	PARSE_ERROR("Wrapped around reading bytecode");
+      break;
+    }
+
+    // Malformed bc file if read past end of block.
+    align32(Buf, EndBuf);
+  }
+
+  handler->handleFunctionEnd(FType);
+
+  // Clear out function-level types...
+  FunctionTypes.clear();
+  CompactionTypeTable.clear();
+}
+
+void AbstractBytecodeParser::ParseAllFunctionBodies() {
+  LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
+  LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
+
+  while ( Fi != Fe ) {
+    const Type* FType = Fi->first;
+    this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
+  }
+}
+
+void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
+
+  handler->handleCompactionTableBegin();
+
+  while (Buf != End) {
+    unsigned NumEntries = read_vbr_uint(Buf, End);
+    unsigned Ty;
+
+    if ((NumEntries & 3) == 3) {
+      NumEntries >>= 2;
+      Ty = read_vbr_uint(Buf, End);
+    } else {
+      Ty = NumEntries >> 2;
+      NumEntries &= 3;
+    }
+
+    handler->handleCompactionTablePlane( Ty, NumEntries );
+
+    if (Ty == Type::TypeTyID) {
+      for (unsigned i = 0; i != NumEntries; ++i) {
+	unsigned TypeSlot = read_vbr_uint(Buf,End);
+        const Type *Typ = getGlobalTableType(TypeSlot);
+	handler->handleCompactionTableType( i, TypeSlot, Typ );
+      }
+    } else {
+      const Type *Typ = getType(Ty);
+      // Push the implicit zero
+      for (unsigned i = 0; i != NumEntries; ++i) {
+	unsigned ValSlot = read_vbr_uint(Buf, End);
+	handler->handleCompactionTableValue( i, ValSlot, Typ );
+      }
+    }
+  }
+  handler->handleCompactionTableEnd();
+}
+
+const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
+					      const unsigned char *EndBuf) {
+  unsigned PrimType = read_vbr_uint(Buf, EndBuf);
+
+  const Type *Val = 0;
+  if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
+    return Val;
+  
+  switch (PrimType) {
+  case Type::FunctionTyID: {
+    const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
+
+    unsigned NumParams = read_vbr_uint(Buf, EndBuf);
+
+    std::vector<const Type*> Params;
+    while (NumParams--)
+      Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
+
+    bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
+    if (isVarArg) Params.pop_back();
+
+    Type* result = FunctionType::get(RetType, Params, isVarArg);
+    handler->handleType( result );
+    return result;
+  }
+  case Type::ArrayTyID: {
+    unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+    const Type *ElementType = getType(ElTyp);
+
+    unsigned NumElements = read_vbr_uint(Buf, EndBuf);
+
+    BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" 
+              << NumElements << "\n");
+    Type* result =  ArrayType::get(ElementType, NumElements);
+    handler->handleType( result );
+    return result;
+  }
+  case Type::StructTyID: {
+    std::vector<const Type*> Elements;
+    unsigned Typ = read_vbr_uint(Buf, EndBuf);
+    while (Typ) {         // List is terminated by void/0 typeid
+      Elements.push_back(getType(Typ));
+      Typ = read_vbr_uint(Buf, EndBuf);
+    }
+
+    Type* result = StructType::get(Elements);
+    handler->handleType( result );
+    return result;
+  }
+  case Type::PointerTyID: {
+    unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+    BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
+    Type* result = PointerType::get(getType(ElTyp));
+    handler->handleType( result );
+    return result;
+  }
+
+  case Type::OpaqueTyID: {
+    Type* result = OpaqueType::get();
+    handler->handleType( result );
+    return result;
+  }
+
+  default:
+    PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
+    return Val;
+  }
+}
+
+// ParseTypeConstants - We have to use this weird code to handle recursive
+// types.  We know that recursive types will only reference the current slab of
+// values in the type plane, but they can forward reference types before they
+// have been read.  For example, Type #0 might be '{ Ty#1 }' and Type #1 might
+// be 'Ty#0*'.  When reading Type #0, type number one doesn't exist.  To fix
+// this ugly problem, we pessimistically insert an opaque type for each type we
+// are about to read.  This means that forward references will resolve to
+// something and when we reread the type later, we can replace the opaque type
+// with a new resolved concrete type.
+//
+void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
+                                        const unsigned char *EndBuf,
+					TypeListTy &Tab,
+					unsigned NumEntries) {
+  assert(Tab.size() == 0 && "should not have read type constants in before!");
+
+  // Insert a bunch of opaque types to be resolved later...
+  Tab.reserve(NumEntries);
+  for (unsigned i = 0; i != NumEntries; ++i)
+    Tab.push_back(OpaqueType::get());
+
+  // Loop through reading all of the types.  Forward types will make use of the
+  // opaque types just inserted.
+  //
+  for (unsigned i = 0; i != NumEntries; ++i) {
+    const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
+    if (NewTy == 0) throw std::string("Couldn't parse type!");
+    BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
+              "' Replacing: " << OldTy << "\n");
+
+    // Don't insertValue the new type... instead we want to replace the opaque
+    // type with the new concrete value...
+    //
+
+    // Refine the abstract type to the new type.  This causes all uses of the
+    // abstract type to use NewTy.  This also will cause the opaque type to be
+    // deleted...
+    //
+    cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
+
+    // This should have replace the old opaque type with the new type in the
+    // value table... or with a preexisting type that was already in the system
+    assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
+  }
+
+  BCR_TRACE(5, "Resulting types:\n");
+  for (unsigned i = 0; i < NumEntries; ++i) {
+    BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
+  }
+}
+
+
+void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
+                                             const unsigned char *EndBuf,
+                                             unsigned TypeID) {
+
+  // We must check for a ConstantExpr before switching by type because
+  // a ConstantExpr can be of any type, and has no explicit value.
+  // 
+  // 0 if not expr; numArgs if is expr
+  unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
+  
+  if (isExprNumArgs) {
+    unsigned Opcode = read_vbr_uint(Buf, EndBuf);
+    const Type* Typ = getType(TypeID);
+    
+    // FIXME: Encoding of constant exprs could be much more compact!
+    std::vector<std::pair<const Type*,unsigned> > ArgVec;
+    ArgVec.reserve(isExprNumArgs);
+
+    // Read the slot number and types of each of the arguments
+    for (unsigned i = 0; i != isExprNumArgs; ++i) {
+      unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
+      unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
+      BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
+                << "'  slot: " << ArgValSlot << "\n");
+      
+      // Get the arg value from its slot if it exists, otherwise a placeholder
+      ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
+    }
+
+    handler->handleConstantExpression( Opcode, Typ, ArgVec );
+    return;
+  }
+  
+  // Ok, not an ConstantExpr.  We now know how to read the given type...
+  const Type *Ty = getType(TypeID);
+  switch (Ty->getPrimitiveID()) {
+  case Type::BoolTyID: {
+    unsigned Val = read_vbr_uint(Buf, EndBuf);
+    if (Val != 0 && Val != 1) 
+      PARSE_ERROR("Invalid boolean value read.");
+
+    handler->handleConstantValue( ConstantBool::get(Val == 1));
+    break;
+  }
+
+  case Type::UByteTyID:   // Unsigned integer types...
+  case Type::UShortTyID:
+  case Type::UIntTyID: {
+    unsigned Val = read_vbr_uint(Buf, EndBuf);
+    if (!ConstantUInt::isValueValidForType(Ty, Val)) 
+      throw std::string("Invalid unsigned byte/short/int read.");
+    handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
+    break;
+  }
+
+  case Type::ULongTyID: {
+    handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
+    break;
+  }
+
+  case Type::SByteTyID:   // Signed integer types...
+  case Type::ShortTyID:
+  case Type::IntTyID: {
+  case Type::LongTyID:
+    int64_t Val = read_vbr_int64(Buf, EndBuf);
+    if (!ConstantSInt::isValueValidForType(Ty, Val)) 
+      throw std::string("Invalid signed byte/short/int/long read.");
+    handler->handleConstantValue(  ConstantSInt::get(Ty, Val) );
+    break;
+  }
+
+  case Type::FloatTyID: {
+    float F;
+    input_data(Buf, EndBuf, &F, &F+1);
+    handler->handleConstantValue( ConstantFP::get(Ty, F) );
+    break;
+  }
+
+  case Type::DoubleTyID: {
+    double Val;
+    input_data(Buf, EndBuf, &Val, &Val+1);
+    handler->handleConstantValue( ConstantFP::get(Ty, Val) );
+    break;
+  }
+
+  case Type::TypeTyID:
+    PARSE_ERROR("Type constants shouldn't live in constant table!");
+    break;
+
+  case Type::ArrayTyID: {
+    const ArrayType *AT = cast<ArrayType>(Ty);
+    unsigned NumElements = AT->getNumElements();
+    std::vector<unsigned> Elements;
+    Elements.reserve(NumElements);
+    while (NumElements--)     // Read all of the elements of the constant.
+      Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+    handler->handleConstantArray( AT, Elements );
+    break;
+  }
+
+  case Type::StructTyID: {
+    const StructType *ST = cast<StructType>(Ty);
+    std::vector<unsigned> Elements;
+    Elements.reserve(ST->getNumElements());
+    for (unsigned i = 0; i != ST->getNumElements(); ++i)
+      Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+    handler->handleConstantStruct( ST, Elements );
+  }    
+
+  case Type::PointerTyID: {  // ConstantPointerRef value...
+    const PointerType *PT = cast<PointerType>(Ty);
+    unsigned Slot = read_vbr_uint(Buf, EndBuf);
+    handler->handleConstantPointer( PT, Slot );
+  }
+
+  default:
+    PARSE_ERROR("Don't know how to deserialize constant value of type '"+
+                      Ty->getDescription());
+  }
+}
+
+void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
+                                      const unsigned char *EndBuf) {
+  ParseConstantPool(Buf, EndBuf, ModuleTypes);
+}
+
+void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
+                                          const unsigned char *EndBuf,
+                                          unsigned NumEntries ){
+  for (; NumEntries; --NumEntries) {
+    unsigned Typ = read_vbr_uint(Buf, EndBuf);
+    const Type *Ty = getType(Typ);
+    if (!isa<ArrayType>(Ty))
+      throw std::string("String constant data invalid!");
+    
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    if (ATy->getElementType() != Type::SByteTy &&
+        ATy->getElementType() != Type::UByteTy)
+      throw std::string("String constant data invalid!");
+    
+    // Read character data.  The type tells us how long the string is.
+    char Data[ATy->getNumElements()];
+    input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
+
+    std::vector<Constant*> Elements(ATy->getNumElements());
+    if (ATy->getElementType() == Type::SByteTy)
+      for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+        Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
+    else
+      for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+        Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
+
+    // Create the constant, inserting it as needed.
+    ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
+    handler->handleConstantString( C );
+  }
+}
+
+
+void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
+                                       const unsigned char *EndBuf,
+                                       TypeListTy &TypeTab) {
+  while (Buf < EndBuf) {
+    unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+    unsigned Typ = read_vbr_uint(Buf, EndBuf);
+    if (Typ == Type::TypeTyID) {
+      ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
+    } else if (Typ == Type::VoidTyID) {
+      ParseStringConstants(Buf, EndBuf, NumEntries);
+    } else {
+      BCR_TRACE(3, "Type: '" << *getType(Typ) << "'  NumEntries: "
+                << NumEntries << "\n");
+
+      for (unsigned i = 0; i < NumEntries; ++i) {
+        ParseConstantValue(Buf, EndBuf, Typ);
+      }
+    }
+  }
+  
+  if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
+}
+
+void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
+
+  handler->handleModuleGlobalsBegin();
+
+  // Read global variables...
+  unsigned VarType = read_vbr_uint(Buf, End);
+  while (VarType != Type::VoidTyID) { // List is terminated by Void
+    // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
+    // Linkage, bit4+ = slot#
+    unsigned SlotNo = VarType >> 5;
+    unsigned LinkageID = (VarType >> 2) & 7;
+    bool isConstant = VarType & 1;
+    bool hasInitializer = VarType & 2;
+    GlobalValue::LinkageTypes Linkage;
+
+    switch (LinkageID) {
+    case 0: Linkage = GlobalValue::ExternalLinkage;  break;
+    case 1: Linkage = GlobalValue::WeakLinkage;      break;
+    case 2: Linkage = GlobalValue::AppendingLinkage; break;
+    case 3: Linkage = GlobalValue::InternalLinkage;  break;
+    case 4: Linkage = GlobalValue::LinkOnceLinkage;  break;
+    default: 
+      PARSE_ERROR("Unknown linkage type: " << LinkageID);
+      Linkage = GlobalValue::InternalLinkage;
+      break;
+    }
+
+    const Type *Ty = getType(SlotNo);
+    if ( !Ty ) {
+      PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
+    }
+
+    if ( !isa<PointerType>(Ty)) {
+      PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
+    }
+
+    const Type *ElTy = cast<PointerType>(Ty)->getElementType();
+
+    // Create the global variable...
+    if (hasInitializer)
+      handler->handleGlobalVariable( ElTy, isConstant, Linkage );
+    else {
+      unsigned initSlot = read_vbr_uint(Buf,End);
+      handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
+    }
+
+    // Get next item
+    VarType = read_vbr_uint(Buf, End);
+  }
+
+  // Read the function objects for all of the functions that are coming
+  unsigned FnSignature = read_vbr_uint(Buf, End);
+  while (FnSignature != Type::VoidTyID) { // List is terminated by Void
+    const Type *Ty = getType(FnSignature);
+    if (!isa<PointerType>(Ty) ||
+        !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
+      PARSE_ERROR( "Function not a pointer to function type! Ty = " +
+                        Ty->getDescription());
+      // FIXME: what should Ty be if handler continues?
+    }
+
+    // We create functions by passing the underlying FunctionType to create...
+    Ty = cast<PointerType>(Ty)->getElementType();
+
+    // Save this for later so we know type of lazily instantiated functions
+    FunctionSignatureList.push_back(Ty);
+
+    handler->handleFunctionDeclaration(Ty);
+
+    // Get Next function signature
+    FnSignature = read_vbr_uint(Buf, End);
+  }
+
+  if (hasInconsistentModuleGlobalInfo)
+    align32(Buf, End);
+
+  // This is for future proofing... in the future extra fields may be added that
+  // we don't understand, so we transparently ignore them.
+  //
+  Buf = End;
+
+  handler->handleModuleGlobalsEnd();
+}
+
+void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
+  unsigned Version = read_vbr_uint(Buf, EndBuf);
+
+  // Unpack version number: low four bits are for flags, top bits = version
+  Module::Endianness  Endianness;
+  Module::PointerSize PointerSize;
+  Endianness  = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
+  PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
+
+  bool hasNoEndianness = Version & 4;
+  bool hasNoPointerSize = Version & 8;
+  
+  RevisionNum = Version >> 4;
+
+  // Default values for the current bytecode version
+  hasInconsistentModuleGlobalInfo = false;
+  hasExplicitPrimitiveZeros = false;
+  hasRestrictedGEPTypes = false;
+
+  switch (RevisionNum) {
+  case 0:               //  LLVM 1.0, 1.1 release version
+    // Base LLVM 1.0 bytecode format.
+    hasInconsistentModuleGlobalInfo = true;
+    hasExplicitPrimitiveZeros = true;
+    // FALL THROUGH
+  case 1:               // LLVM 1.2 release version
+    // LLVM 1.2 added explicit support for emitting strings efficiently.
+
+    // Also, it fixed the problem where the size of the ModuleGlobalInfo block
+    // included the size for the alignment at the end, where the rest of the
+    // blocks did not.
+
+    // LLVM 1.2 and before required that GEP indices be ubyte constants for
+    // structures and longs for sequential types.
+    hasRestrictedGEPTypes = true;
+
+    // FALL THROUGH
+  case 2:               // LLVM 1.3 release version
+    break;
+
+  default:
+    PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
+  }
+
+  if (hasNoEndianness) Endianness  = Module::AnyEndianness;
+  if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
+
+  handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
+}
+
+void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
+  unsigned Type, Size;
+  readBlock(Buf, EndBuf, Type, Size);
+  if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
+    // Hrm, not a class?
+    PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
+        ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); 
+
+  // Read into instance variables...
+  ParseVersionInfo(Buf, EndBuf);
+  align32(Buf, EndBuf);
+
+  bool SeenModuleGlobalInfo = false;
+  bool SeenGlobalTypePlane = false;
+  while (Buf < EndBuf) {
+    BufPtr OldBuf = Buf;
+    readBlock(Buf, EndBuf, Type, Size);
+
+    switch (Type) {
+
+    case BytecodeFormat::GlobalTypePlane:
+      if ( SeenGlobalTypePlane )
+	PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
+
+      ParseGlobalTypes(Buf, Buf+Size);
+      SeenGlobalTypePlane = true;
+      break;
+
+    case BytecodeFormat::ModuleGlobalInfo: 
+      if ( SeenModuleGlobalInfo )
+	PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
+      ParseModuleGlobalInfo(Buf, Buf+Size);
+      SeenModuleGlobalInfo = true;
+      break;
+
+    case BytecodeFormat::ConstantPool:
+      ParseConstantPool(Buf, Buf+Size, ModuleTypes);
+      break;
+
+    case BytecodeFormat::Function:
+      ParseFunctionLazily(Buf, Buf+Size);
+      break;
+
+    case BytecodeFormat::SymbolTable:
+      ParseSymbolTable(Buf, Buf+Size );
+      break;
+
+    default:
+      Buf += Size;
+      if (OldBuf > Buf) 
+      {
+	PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
+      }
+      break;
+    }
+    align32(Buf, EndBuf);
+  }
+}
+
+void AbstractBytecodeParser::ParseBytecode(
+       BufPtr Buf, unsigned Length,
+       const std::string &ModuleID) {
+
+  handler->handleStart();
+  unsigned char *EndBuf = (unsigned char*)(Buf + Length);
+
+  // Read and check signature...
+  unsigned Sig = read(Buf, EndBuf);
+  if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
+    PARSE_ERROR("Invalid bytecode signature: " << Sig);
+  }
+
+  handler->handleModuleBegin(ModuleID);
+
+  this->ParseModule(Buf, EndBuf);
+
+  handler->handleModuleEnd(ModuleID);
+
+  handler->handleFinish();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Parser.h b/lib/Bytecode/Reader/Parser.h
new file mode 100644
index 0000000000..027047b3f8
--- /dev/null
+++ b/lib/Bytecode/Reader/Parser.h
@@ -0,0 +1,178 @@
+//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the 
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//  This header file defines the interface to the Bytecode Parser
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BYTECODE_PARSER_H
+#define BYTECODE_PARSER_H
+
+#include "ReaderPrimitives.h"
+#include "BytecodeHandler.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include <utility>
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+struct LazyFunctionInfo {
+  const unsigned char *Buf, *EndBuf;
+  LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
+    : Buf(B), EndBuf(EB) {}
+};
+
+typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap;
+
+class AbstractBytecodeParser {
+  AbstractBytecodeParser(const AbstractBytecodeParser &);  // DO NOT IMPLEMENT
+  void operator=(const AbstractBytecodeParser &);  // DO NOT IMPLEMENT
+public:
+  AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; }
+  ~AbstractBytecodeParser() { }
+
+  void ParseBytecode(const unsigned char *Buf, unsigned Length,
+                     const std::string &ModuleID);
+
+  void dump() const {
+    std::cerr << "AbstractBytecodeParser instance!\n";
+  }
+
+private:
+  // Information about the module, extracted from the bytecode revision number.
+  unsigned char RevisionNum;        // The rev # itself
+
+  // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
+
+  // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
+  // block.  This was fixed to be like all other blocks in 1.2
+  bool hasInconsistentModuleGlobalInfo;
+
+  // Revision #0 also explicitly encoded zero values for primitive types like
+  // int/sbyte/etc.
+  bool hasExplicitPrimitiveZeros;
+
+  // Flags to control features specific the LLVM 1.2 and before (revision #1)
+
+  // LLVM 1.2 and earlier required that getelementptr structure indices were
+  // ubyte constants and that sequential type indices were longs.
+  bool hasRestrictedGEPTypes;
+
+
+  /// CompactionTable - If a compaction table is active in the current function,
+  /// this is the mapping that it contains.
+  std::vector<Type*> CompactionTypeTable;
+
+  // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
+  // forward references to constants.  Such values may be referenced before they
+  // are defined, and if so, the temporary object that they represent is held
+  // here.
+  //
+  typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
+  ConstantRefsType ConstantFwdRefs;
+
+  // TypesLoaded - This vector mirrors the Values[TypeTyID] plane.  It is used
+  // to deal with forward references to types.
+  //
+  typedef std::vector<PATypeHolder> TypeListTy;
+  TypeListTy ModuleTypes;
+  TypeListTy FunctionTypes;
+
+  // When the ModuleGlobalInfo section is read, we create a FunctionType object
+  // for each function in the module. When the function is loaded, this type is
+  // used to instantiate the actual function object.
+  std::vector<const Type*> FunctionSignatureList;
+
+  // Constant values are read in after global variables.  Because of this, we
+  // must defer setting the initializers on global variables until after module
+  // level constants have been read.  In the mean time, this list keeps track of
+  // what we must do.
+  //
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+
+  // For lazy reading-in of functions, we need to save away several pieces of
+  // information about each function: its begin and end pointer in the buffer
+  // and its FunctionSlot.
+  // 
+  LazyFunctionMap LazyFunctionLoadMap;
+
+  /// The handler for parsing
+  BytecodeHandler* handler;
+  
+private:
+  const Type *AbstractBytecodeParser::getType(unsigned ID);
+  /// getGlobalTableType - This is just like getType, but when a compaction
+  /// table is in use, it is ignored.  Also, no forward references or other
+  /// fancy features are supported.
+  const Type *getGlobalTableType(unsigned Slot) {
+    if (Slot < Type::FirstDerivedTyID) {
+      const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot);
+      assert(Ty && "Not a primitive type ID?");
+      return Ty;
+    }
+    Slot -= Type::FirstDerivedTyID;
+    if (Slot >= ModuleTypes.size())
+      throw std::string("Illegal compaction table type reference!");
+    return ModuleTypes[Slot];
+  }
+
+  unsigned getGlobalTableTypeSlot(const Type *Ty) {
+    if (Ty->isPrimitiveType())
+      return Ty->getPrimitiveID();
+    TypeListTy::iterator I = find(ModuleTypes.begin(),
+                                        ModuleTypes.end(), Ty);
+    if (I == ModuleTypes.end())
+      throw std::string("Didn't find type in ModuleTypes.");
+    return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
+  }
+
+public:
+  typedef const unsigned char* BufPtr;
+  void ParseModule             (BufPtr &Buf, BufPtr End);
+  void ParseNextFunction       (Type* FType) ;
+  void ParseAllFunctionBodies  ();
+
+private:
+  void ParseVersionInfo        (BufPtr &Buf, BufPtr End);
+  void ParseModuleGlobalInfo   (BufPtr &Buf, BufPtr End);
+  void ParseSymbolTable        (BufPtr &Buf, BufPtr End);
+  void ParseFunctionLazily     (BufPtr &Buf, BufPtr End);
+  void ParseFunctionBody       (const Type* FType, BufPtr &Buf, BufPtr EndBuf);
+  void ParseCompactionTable    (BufPtr &Buf, BufPtr End);
+  void ParseGlobalTypes        (BufPtr &Buf, BufPtr End);
+
+  void ParseBasicBlock         (BufPtr &Buf, BufPtr End, unsigned BlockNo);
+  unsigned ParseInstructionList(BufPtr &Buf, BufPtr End);
+  
+  bool ParseInstruction        (BufPtr &Buf, BufPtr End, 
+	                        std::vector<unsigned>& Args);
+
+  void ParseConstantPool       (BufPtr &Buf, BufPtr End, TypeListTy& List);
+  void ParseConstantValue      (BufPtr &Buf, BufPtr End, unsigned TypeID);
+  void ParseTypeConstants      (BufPtr &Buf, BufPtr End, TypeListTy &Tab,
+					unsigned NumEntries);
+  const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End);
+  void ParseStringConstants    (BufPtr &Buf, BufPtr End, unsigned NumEntries);
+
+};
+
+
+static inline void readBlock(const unsigned char *&Buf,
+                             const unsigned char *EndBuf, 
+                             unsigned &Type, unsigned &Size) {
+  Type = read(Buf, EndBuf);
+  Size = read(Buf, EndBuf);
+}
+
+} // End llvm namespace
+
+#endif
+// vim: sw=2
author	Reid Spencer <rspencer@reidspencer.com>	2004-06-07 17:53:43 +0000
committer	Reid Spencer <rspencer@reidspencer.com>	2004-06-07 17:53:43 +0000
commit	dac69c83c22a00d3f8de3bb4d62b9dbeb0a20caf (patch)
tree	b5d0372bb98f6c5080d09fa6da48cb17ded7a08e /lib/Bytecode/Reader
parent	d66215607c214a00c79da6625b7de5e0d25a360f (diff)