Copy LLVM bitcode reader to generate a PNaCl wire format reader.

Copy classes for LLVM BitcodeReader into a NaCl subdirectory, to create a wire format version. Renames classes/functions to include NaCl prefix, so that they don't conflict with the LLVM Bitcode reader. Also implements pnacl-thaw, showing that we can read the PNaCl wire format files. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/14314016
author: Karl Schimpf <kschimpf@google.com> 2013-05-01 10:42:30 -0700
committer: Karl Schimpf <kschimpf@google.com> 2013-05-01 10:42:30 -0700
commit: f42b26d0d46034cb2b91df810477fc0df2e67b27 (patch)
tree: bc8749b8fa6e36670df3171a27d9db2bfab608a7 /include/llvm/Bitcode
parent: 595239b2b7297b62d9f804770f5f43d8bf637a0f (diff)
3 files changed, 1020 insertions, 1 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
new file mode 100644
index 0000000000..238ce5275a
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -0,0 +1,560 @@
+//===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===//
+//     Low-level bitstream reader interface
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+#define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <climits>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class Deserializer;
+
+/// NaClBitstreamReader - This class is used to read from a NaCl
+/// bitcode wire format stream, maintaining information that is global
+/// to decoding the entire file.  While a file is being read, multiple
+/// cursors can be independently advanced or skipped around within the
+/// file.  These are represented by the NaClBitstreamCursor class.
+class NaClBitstreamReader {
+public:
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<BitCodeAbbrev*> Abbrevs;
+    std::string Name;
+
+    std::vector<std::pair<unsigned, std::string> > RecordNames;
+  };
+private:
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
+
+  std::vector<BlockInfo> BlockInfoRecords;
+
+  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
+  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
+  /// uses this.
+  bool IgnoreBlockInfoNames;
+
+  NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+public:
+  NaClBitstreamReader() : IgnoreBlockInfoNames(true) {
+  }
+
+  NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) {
+    IgnoreBlockInfoNames = true;
+    init(Start, End);
+  }
+
+  NaClBitstreamReader(StreamableMemoryObject *bytes) {
+    BitcodeBytes.reset(bytes);
+  }
+
+  void init(const unsigned char *Start, const unsigned char *End) {
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
+  }
+
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
+  ~NaClBitstreamReader() {
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  /// CollectBlockInfoNames - This is called by clients that want block/record
+  /// name information.
+  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
+  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// hasBlockInfoRecords - Return true if we've already read and processed the
+  /// block info block for this Bitstream.  We only process it for the first
+  /// cursor that walks over it.
+  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+};
+
+  
+/// NaClBitstreamEntry - When advancing through a bitstream cursor,
+/// each advance can discover a few different kinds of entries:
+///   Error    - Malformed bitcode was found.
+///   EndBlock - We've reached the end of the current block, (or the end of the
+///              file, which is treated like a series of EndBlock records.
+///   SubBlock - This is the start of a new subblock of a specific ID.
+///   Record   - This is a record with a specific AbbrevID.
+///
+struct NaClBitstreamEntry {
+  enum {
+    Error,
+    EndBlock,
+    SubBlock,
+    Record
+  } Kind;
+  
+  unsigned ID;
+
+  static NaClBitstreamEntry getError() {
+    NaClBitstreamEntry E; E.Kind = Error; return E;
+  }
+  static NaClBitstreamEntry getEndBlock() {
+    NaClBitstreamEntry E; E.Kind = EndBlock; return E;
+  }
+  static NaClBitstreamEntry getSubBlock(unsigned ID) {
+    NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
+  }
+  static NaClBitstreamEntry getRecord(unsigned AbbrevID) {
+    NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
+  }
+};
+
+/// NaClBitstreamCursor - This represents a position within a bitcode
+/// file.  There may be multiple independent cursors reading within
+/// one bitstream, each maintaining their own local state.
+///
+/// Unlike iterators, NaClBitstreamCursors are heavy-weight objects
+/// that should not be passed by value.
+class NaClBitstreamCursor {
+  friend class Deserializer;
+  NaClBitstreamReader *BitStream;
+  size_t NextChar;
+
+  
+  /// CurWord/word_t - This is the current data we have pulled from the stream
+  /// but have not returned to the client.  This is specifically and
+  /// intentionally defined to follow the word size of the host machine for
+  /// efficiency.  We use word_t in places that are aware of this to make it
+  /// perfectly explicit what is going on.
+  typedef uint32_t word_t;
+  word_t CurWord;
+
+  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+  /// is always from [0...31/63] inclusive (depending on word size).
+  unsigned BitsInCurWord;
+
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  unsigned CurCodeSize;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<BitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    unsigned PrevCodeSize;
+    std::vector<BitCodeAbbrev*> PrevAbbrevs;
+    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
+  };
+
+  /// BlockScope - This tracks the codesize of parent blocks.
+  SmallVector<Block, 8> BlockScope;
+
+  
+public:
+  NaClBitstreamCursor() : BitStream(0), NextChar(0) {
+  }
+  NaClBitstreamCursor(const NaClBitstreamCursor &RHS)
+      : BitStream(0), NextChar(0) {
+    operator=(RHS);
+  }
+
+  explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) {
+    NextChar = 0;
+    CurWord = 0;
+    BitsInCurWord = 0;
+    CurCodeSize = 2;
+  }
+
+  void init(NaClBitstreamReader &R) {
+    freeState();
+
+    BitStream = &R;
+    NextChar = 0;
+    CurWord = 0;
+    BitsInCurWord = 0;
+    CurCodeSize = 2;
+  }
+
+  ~NaClBitstreamCursor() {
+    freeState();
+  }
+
+  void operator=(const NaClBitstreamCursor &RHS);
+
+  void freeState();
+  
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  uint32_t getWord(size_t pos) {
+    uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+    BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL);
+    return *reinterpret_cast<support::ulittle32_t *>(buf);
+  }
+
+  bool AtEndOfStream() {
+    return BitsInCurWord == 0 && isEndPos(NextChar);
+  }
+
+  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
+
+  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
+  uint64_t GetCurrentBitNo() const {
+    return NextChar*CHAR_BIT - BitsInCurWord;
+  }
+
+  NaClBitstreamReader *getBitStreamReader() {
+    return BitStream;
+  }
+  const NaClBitstreamReader *getBitStreamReader() const {
+    return BitStream;
+  }
+
+  /// Flags that modify the behavior of advance().
+  enum {
+    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
+    /// not automatically pop the block scope when the end of a block is
+    /// reached.
+    AF_DontPopBlockAtEnd = 1,
+
+    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
+    /// returned just like normal records.
+    AF_DontAutoprocessAbbrevs = 2
+  };
+  
+  /// advance - Advance the current bitstream, returning the next entry in the
+  /// stream.
+  NaClBitstreamEntry advance(unsigned Flags = 0) {
+    while (1) {
+      unsigned Code = ReadCode();
+      if (Code == bitc::END_BLOCK) {
+        // Pop the end of the block unless Flags tells us not to.
+        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
+          return NaClBitstreamEntry::getError();
+        return NaClBitstreamEntry::getEndBlock();
+      }
+      
+      if (Code == bitc::ENTER_SUBBLOCK)
+        return NaClBitstreamEntry::getSubBlock(ReadSubBlockID());
+      
+      if (Code == bitc::DEFINE_ABBREV &&
+          !(Flags & AF_DontAutoprocessAbbrevs)) {
+        // We read and accumulate abbrev's, the client can't do anything with
+        // them anyway.
+        ReadAbbrevRecord();
+        continue;
+      }
+
+      return NaClBitstreamEntry::getRecord(Code);
+    }
+  }
+
+  /// advanceSkippingSubblocks - This is a convenience function for clients that
+  /// don't expect any subblocks.  This just skips over them automatically.
+  NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
+    while (1) {
+      // If we found a normal entry, return it.
+      NaClBitstreamEntry Entry = advance(Flags);
+      if (Entry.Kind != NaClBitstreamEntry::SubBlock)
+        return Entry;
+      
+      // If we found a sub-block, just skip over it and check the next entry.
+      if (SkipBlock())
+        return NaClBitstreamEntry::getError();
+    }
+  }
+
+  /// JumpToBit - Reset the stream to the specified bit number.
+  void JumpToBit(uint64_t BitNo) {
+    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
+    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
+    assert(canSkipToPos(ByteNo) && "Invalid location");
+
+    // Move the cursor to the right word.
+    NextChar = ByteNo;
+    BitsInCurWord = 0;
+    CurWord = 0;
+
+    // Skip over any bits that are already consumed.
+    if (WordBitNo) {
+      if (sizeof(word_t) > 4)
+        Read64(WordBitNo);
+      else
+        Read(WordBitNo);
+    }
+  }
+
+
+  uint32_t Read(unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 &&
+           "Cannot return zero or more than 32 bits!");
+    
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
+      CurWord >>= NumBits;
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    // If we run out of data, stop at the end of the stream.
+    if (isEndPos(NextChar)) {
+      CurWord = 0;
+      BitsInCurWord = 0;
+      return 0;
+    }
+
+    uint32_t R = uint32_t(CurWord);
+
+    // Read the next word from the stream.
+    uint8_t Array[sizeof(word_t)] = {0};
+    
+    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
+                                           Array, NULL);
+    
+    // Handle big-endian byte-swapping if necessary.
+    support::detail::packed_endian_specific_integral
+      <word_t, support::little, support::unaligned> EndianValue;
+    memcpy(&EndianValue, Array, sizeof(Array));
+    
+    CurWord = EndianValue;
+
+    NextChar += sizeof(word_t);
+
+    // Extract NumBits-BitsInCurWord from what we just read.
+    unsigned BitsLeft = NumBits-BitsInCurWord;
+
+    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
+    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
+                    << BitsInCurWord);
+
+    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
+    // range [1..32]/[1..64] so be careful how we shift.
+    if (BitsLeft != sizeof(word_t)*8)
+      CurWord >>= BitsLeft;
+    else
+      CurWord = 0;
+    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
+    return R;
+  }
+
+  uint64_t Read64(unsigned NumBits) {
+    if (NumBits <= 32) return Read(NumBits);
+
+    uint64_t V = Read(32);
+    return V | (uint64_t)Read(NumBits-32) << 32;
+  }
+
+  uint32_t ReadVBR(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
+  uint64_t ReadVBR64(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
+
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+private:
+  void SkipToFourByteBoundary() {
+    // If word_t is 64-bits and if we've read less than 32 bits, just dump
+    // the bits we have up to the next 32-bit boundary.
+    if (sizeof(word_t) > 4 &&
+        BitsInCurWord >= 32) {
+      CurWord >>= BitsInCurWord-32;
+      BitsInCurWord = 32;
+      return;
+    }
+    
+    BitsInCurWord = 0;
+    CurWord = 0;
+  }
+public:
+
+  unsigned ReadCode() {
+    return Read(CurCodeSize);
+  }
+
+
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+  /// the block.
+  unsigned ReadSubBlockID() {
+    return ReadVBR(bitc::BlockIDWidth);
+  }
+
+  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+  /// over the body of this block.  If the block record is malformed, return
+  /// true.
+  bool SkipBlock() {
+    // Read and ignore the codelen value.  Since we are skipping this block, we
+    // don't care what code widths are used inside of it.
+    ReadVBR(bitc::CodeLenWidth);
+    SkipToFourByteBoundary();
+    unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
+
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
+      return true;
+
+    JumpToBit(SkipTo);
+    return false;
+  }
+
+  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+  /// the block, and return true if the block has an error.
+  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
+  
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToFourByteBoundary();
+
+    popBlockScope();
+    return false;
+  }
+
+private:
+
+  void popBlockScope() {
+    CurCodeSize = BlockScope.back().PrevCodeSize;
+
+    // Delete abbrevs from popped scope.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+
+private:
+  void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
+                              SmallVectorImpl<uint64_t> &Vals);
+  void readAbbreviatedField(const BitCodeAbbrevOp &Op,
+                            SmallVectorImpl<uint64_t> &Vals);
+  void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
+  
+public:
+
+  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
+  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+    unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    return CurAbbrevs[AbbrevNo];
+  }
+
+  /// skipRecord - Read the current record and discard it.
+  void skipRecord(unsigned AbbrevID);
+  
+  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
+                      StringRef *Blob = 0);
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Processing
+  //===--------------------------------------------------------------------===//
+  void ReadAbbrevRecord();
+  
+  bool ReadBlockInfoBlock();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
new file mode 100644
index 0000000000..a211a61edc
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
@@ -0,0 +1,339 @@
+//===- NaClLLVMBitCodes.h ---------------------------------------*- C++ -*-===//
+//     Enum values for the NaCl bitcode wire format
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines Bitcode enum values for NaCl bitcode wire format.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODES_H
+#define LLVM_BITCODE_NACL_NACLBITCODES_H
+
+// TODO(kschimpf) Make a NaCl version of BitCodes.h, so that block id's
+// and abbreviations can be modified.
+#include "llvm/Bitcode/BitCodes.h"
+
+namespace llvm {
+namespace naclbitc {
+  // The only top-level block type defined is for a module.
+  enum NaClBlockIDs {
+    // Blocks
+    MODULE_BLOCK_ID          = bitc::FIRST_APPLICATION_BLOCKID,
+
+    // Module sub-block id's.
+    PARAMATTR_BLOCK_ID,
+    PARAMATTR_GROUP_BLOCK_ID,
+
+    CONSTANTS_BLOCK_ID,
+    FUNCTION_BLOCK_ID,
+
+    UNUSED_ID1,
+
+    VALUE_SYMTAB_BLOCK_ID,
+    METADATA_BLOCK_ID,
+    METADATA_ATTACHMENT_ID,
+
+    TYPE_BLOCK_ID_NEW,
+
+    USELIST_BLOCK_ID
+  };
+
+
+  /// MODULE blocks have a number of optional fields and subblocks.
+  enum NaClModuleCodes {
+    MODULE_CODE_VERSION     = 1,    // VERSION:     [version#]
+    MODULE_CODE_TRIPLE      = 2,    // TRIPLE:      [strchr x N]
+    MODULE_CODE_DATALAYOUT  = 3,    // DATALAYOUT:  [strchr x N]
+    MODULE_CODE_ASM         = 4,    // ASM:         [strchr x N]
+    MODULE_CODE_SECTIONNAME = 5,    // SECTIONNAME: [strchr x N]
+
+    // FIXME: Remove DEPLIB in 4.0.
+    MODULE_CODE_DEPLIB      = 6,    // DEPLIB:      [strchr x N]
+
+    // GLOBALVAR: [pointer type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal]
+    MODULE_CODE_GLOBALVAR   = 7,
+
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
+    //             section, visibility, gc, unnamed_addr]
+    MODULE_CODE_FUNCTION    = 8,
+
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    MODULE_CODE_ALIAS       = 9,
+
+    // MODULE_CODE_PURGEVALS: [numvals]
+    MODULE_CODE_PURGEVALS   = 10,
+
+    MODULE_CODE_GCNAME      = 11   // GCNAME: [strchr x N]
+  };
+
+  /// PARAMATTR blocks have code for defining a parameter attribute set.
+  enum NaClAttributeCodes {
+    // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
+    PARAMATTR_CODE_ENTRY_OLD  = 1, // ENTRY: [paramidx0, attr0,
+                                   //         paramidx1, attr1...]
+    PARAMATTR_CODE_ENTRY      = 2, // ENTRY: [paramidx0, attrgrp0,
+                                   //         paramidx1, attrgrp1, ...]
+    PARAMATTR_GRP_CODE_ENTRY  = 3  // ENTRY: [id, attr0, att1, ...]
+  };
+
+  /// TYPE blocks have codes for each type primitive they use.
+  enum NaClTypeCodes {
+    TYPE_CODE_NUMENTRY =  1,    // NUMENTRY: [numentries]
+
+    // Type Codes
+    TYPE_CODE_VOID     =  2,    // VOID
+    TYPE_CODE_FLOAT    =  3,    // FLOAT
+    TYPE_CODE_DOUBLE   =  4,    // DOUBLE
+    TYPE_CODE_LABEL    =  5,    // LABEL
+    TYPE_CODE_OPAQUE   =  6,    // OPAQUE
+    TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
+    TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
+
+    TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
+                                //            paramty x N]
+
+    TYPE_CODE_HALF     =  10,   // HALF
+
+    TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
+    TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
+
+    // These are not with the other floating point types because they're
+    // a late addition, and putting them in the right place breaks
+    // binary compatibility.
+    TYPE_CODE_X86_FP80 = 13,    // X86 LONG DOUBLE
+    TYPE_CODE_FP128    = 14,    // LONG DOUBLE (112 bit mantissa)
+    TYPE_CODE_PPC_FP128= 15,    // PPC LONG DOUBLE (2 doubles)
+
+    TYPE_CODE_METADATA = 16,    // METADATA
+
+    TYPE_CODE_X86_MMX = 17,     // X86 MMX
+
+    TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
+    TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
+    TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
+
+    TYPE_CODE_FUNCTION = 21     // FUNCTION: [vararg, retty, paramty x N]
+  };
+
+  // The type symbol table only has one code (TST_ENTRY_CODE).
+  enum NaClTypeSymtabCodes {
+    TST_CODE_ENTRY = 1     // TST_ENTRY: [typeid, namechar x N]
+  };
+
+  // The value symbol table only has one code (VST_ENTRY_CODE).
+  enum NaClValueSymtabCodes {
+    VST_CODE_ENTRY   = 1,  // VST_ENTRY: [valid, namechar x N]
+    VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
+  };
+
+  enum NaClMetadataCodes {
+    METADATA_STRING        = 1,   // MDSTRING:      [values]
+    // 2 is unused.
+    // 3 is unused.
+    METADATA_NAME          = 4,   // STRING:        [values]
+    // 5 is unused.
+    METADATA_KIND          = 6,   // [n x [id, name]]
+    // 7 is unused.
+    METADATA_NODE          = 8,   // NODE:          [n x (type num, value num)]
+    METADATA_FN_NODE       = 9,   // FN_NODE:       [n x (type num, value num)]
+    METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
+    METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
+  };
+
+  // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
+  // constant and maintains an implicit current type value.
+  enum NaClConstantsCodes {
+    CST_CODE_SETTYPE       =  1,  // SETTYPE:       [typeid]
+    CST_CODE_NULL          =  2,  // NULL
+    CST_CODE_UNDEF         =  3,  // UNDEF
+    CST_CODE_INTEGER       =  4,  // INTEGER:       [intval]
+    CST_CODE_WIDE_INTEGER  =  5,  // WIDE_INTEGER:  [n x intval]
+    CST_CODE_FLOAT         =  6,  // FLOAT:         [fpval]
+    CST_CODE_AGGREGATE     =  7,  // AGGREGATE:     [n x value number]
+    CST_CODE_STRING        =  8,  // STRING:        [values]
+    CST_CODE_CSTRING       =  9,  // CSTRING:       [values]
+    CST_CODE_CE_BINOP      = 10,  // CE_BINOP:      [opcode, opval, opval]
+    CST_CODE_CE_CAST       = 11,  // CE_CAST:       [opcode, opty, opval]
+    CST_CODE_CE_GEP        = 12,  // CE_GEP:        [n x operands]
+    CST_CODE_CE_SELECT     = 13,  // CE_SELECT:     [opval, opval, opval]
+    CST_CODE_CE_EXTRACTELT = 14,  // CE_EXTRACTELT: [opty, opval, opval]
+    CST_CODE_CE_INSERTELT  = 15,  // CE_INSERTELT:  [opval, opval, opval]
+    CST_CODE_CE_SHUFFLEVEC = 16,  // CE_SHUFFLEVEC: [opval, opval, opval]
+    CST_CODE_CE_CMP        = 17,  // CE_CMP:        [opty, opval, opval, pred]
+    CST_CODE_INLINEASM_OLD = 18,  // INLINEASM:     [sideeffect|alignstack,
+                                  //                 asmstr,conststr]
+    CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
+    CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP:  [n x operands]
+    CST_CODE_BLOCKADDRESS  = 21,  // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
+    CST_CODE_DATA          = 22,  // DATA:          [n x elements]
+    CST_CODE_INLINEASM     = 23   // INLINEASM:     [sideeffect|alignstack|
+                                  //                 asmdialect,asmstr,conststr]
+  };
+
+  /// CastOpcodes - These are values used in the bitcode files to encode which
+  /// cast a CST_CODE_CE_CAST or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClCastOpcodes {
+    CAST_TRUNC    =  0,
+    CAST_ZEXT     =  1,
+    CAST_SEXT     =  2,
+    CAST_FPTOUI   =  3,
+    CAST_FPTOSI   =  4,
+    CAST_UITOFP   =  5,
+    CAST_SITOFP   =  6,
+    CAST_FPTRUNC  =  7,
+    CAST_FPEXT    =  8,
+    CAST_PTRTOINT =  9,
+    CAST_INTTOPTR = 10,
+    CAST_BITCAST  = 11
+  };
+
+  /// BinaryOpcodes - These are values used in the bitcode files to encode which
+  /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClBinaryOpcodes {
+    BINOP_ADD  =  0,
+    BINOP_SUB  =  1,
+    BINOP_MUL  =  2,
+    BINOP_UDIV =  3,
+    BINOP_SDIV =  4,    // overloaded for FP
+    BINOP_UREM =  5,
+    BINOP_SREM =  6,    // overloaded for FP
+    BINOP_SHL  =  7,
+    BINOP_LSHR =  8,
+    BINOP_ASHR =  9,
+    BINOP_AND  = 10,
+    BINOP_OR   = 11,
+    BINOP_XOR  = 12
+  };
+
+  /// These are values used in the bitcode files to encode AtomicRMW operations.
+  /// The values of these enums have no fixed relation to the LLVM IR enum
+  /// values.  Changing these will break compatibility with old files.
+  enum NaClRMWOperations {
+    RMW_XCHG = 0,
+    RMW_ADD = 1,
+    RMW_SUB = 2,
+    RMW_AND = 3,
+    RMW_NAND = 4,
+    RMW_OR = 5,
+    RMW_XOR = 6,
+    RMW_MAX = 7,
+    RMW_MIN = 8,
+    RMW_UMAX = 9,
+    RMW_UMIN = 10
+  };
+
+  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
+  /// OverflowingBinaryOperator's SubclassOptionalData contents.
+  enum NaClOverflowingBinaryOperatorOptionalFlags {
+    OBO_NO_UNSIGNED_WRAP = 0,
+    OBO_NO_SIGNED_WRAP = 1
+  };
+
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing
+  /// PossiblyExactOperator's SubclassOptionalData contents.
+  enum NaClPossiblyExactOperatorOptionalFlags {
+    PEO_EXACT = 0
+  };
+
+  /// Encoded AtomicOrdering values.
+  enum NaClAtomicOrderingCodes {
+    ORDERING_NOTATOMIC = 0,
+    ORDERING_UNORDERED = 1,
+    ORDERING_MONOTONIC = 2,
+    ORDERING_ACQUIRE = 3,
+    ORDERING_RELEASE = 4,
+    ORDERING_ACQREL = 5,
+    ORDERING_SEQCST = 6
+  };
+
+  /// Encoded SynchronizationScope values.
+  enum NaClAtomicSynchScopeCodes {
+    SYNCHSCOPE_SINGLETHREAD = 0,
+    SYNCHSCOPE_CROSSTHREAD = 1
+  };
+
+  // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
+  // can contain a constant block (CONSTANTS_BLOCK_ID).
+  enum NaClFunctionCodes {
+    FUNC_CODE_DECLAREBLOCKS    =  1, // DECLAREBLOCKS: [n]
+
+    FUNC_CODE_INST_BINOP       =  2, // BINOP:      [opcode, ty, opval, opval]
+    FUNC_CODE_INST_CAST        =  3, // CAST:       [opcode, ty, opty, opval]
+    FUNC_CODE_INST_GEP         =  4, // GEP:        [n x operands]
+    FUNC_CODE_INST_SELECT      =  5, // SELECT:     [ty, opval, opval, opval]
+    FUNC_CODE_INST_EXTRACTELT  =  6, // EXTRACTELT: [opty, opval, opval]
+    FUNC_CODE_INST_INSERTELT   =  7, // INSERTELT:  [ty, opval, opval, opval]
+    FUNC_CODE_INST_SHUFFLEVEC  =  8, // SHUFFLEVEC: [ty, opval, opval, opval]
+    FUNC_CODE_INST_CMP         =  9, // CMP:        [opty, opval, opval, pred]
+
+    FUNC_CODE_INST_RET         = 10, // RET:        [opty,opval<both optional>]
+    FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
+    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
+    FUNC_CODE_INST_INVOKE      = 13, // INVOKE:     [attr, fnty, op0,op1, ...]
+    // 14 is unused.
+    FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
+
+    FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
+    // 17 is unused.
+    // 18 is unused.
+    FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [instty, op, align]
+    FUNC_CODE_INST_LOAD        = 20, // LOAD:       [opty, op, align, vol]
+    // 21 is unused.
+    // 22 is unused.
+    FUNC_CODE_INST_VAARG       = 23, // VAARG:      [valistty, valist, instty]
+    // This store code encodes the pointer type, rather than the value type
+    // this is so information only available in the pointer type (e.g. address
+    // spaces) is retained.
+    FUNC_CODE_INST_STORE       = 24, // STORE:      [ptrty,ptr,val, align, vol]
+    // 25 is unused.
+    FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [n x operands]
+    FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [n x operands]
+    // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
+    // support legacy vicmp/vfcmp instructions.
+    FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opty, opval, opval, pred]
+    // new select on i1 or [N x i1]
+    FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
+    FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
+    FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
+    // 32 is unused.
+    FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // DEBUG_LOC_AGAIN
+
+    FUNC_CODE_INST_CALL        = 34, // CALL:       [attr, fnty, fnid, args...]
+
+    FUNC_CODE_DEBUG_LOC        = 35, // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
+    FUNC_CODE_INST_FENCE       = 36, // FENCE: [ordering, synchscope]
+    FUNC_CODE_INST_CMPXCHG     = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol,
+                                     //           ordering, synchscope]
+    FUNC_CODE_INST_ATOMICRMW   = 38, // ATOMICRMW: [ptrty,ptr,val, operation,
+                                     //             align, vol,
+                                     //             ordering, synchscope]
+    FUNC_CODE_INST_RESUME      = 39, // RESUME:     [opval]
+    FUNC_CODE_INST_LANDINGPAD  = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_LOADATOMIC  = 41, // LOAD: [opty, op, align, vol,
+                                     //        ordering, synchscope]
+    FUNC_CODE_INST_STOREATOMIC = 42  // STORE: [ptrty,ptr,val, align, vol
+                                     //         ordering, synchscope]
+  };
+
+  enum NaClUseListCodes {
+    USELIST_CODE_ENTRY = 1   // USELIST_CODE_ENTRY: TBD.
+  };
+} // End naclbitc namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
index 2b905b97e8..d505af0a1f 100644
--- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -8,22 +8,142 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This header defines interfaces to read and write LLVM bitcode files/streams.
+// This header defines interfaces to read and write NaCl bitcode wire format
+// files.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_BITCODE_NACL_NACLREADERWRITER_H
 #define LLVM_BITCODE_NACL_NACLREADERWRITER_H
 
+#include <string>
+
 namespace llvm {
+  class MemoryBuffer;
+  class DataStreamer;
+  class LLVMContext;
   class Module;
   class raw_ostream;
 
+  /// getNaClLazyBitcodeModule - Read the header of the specified bitcode buffer
+  /// and prepare for lazy deserialization of function bodies.  If successful,
+  /// this takes ownership of 'buffer' and returns a non-null pointer.  On
+  /// error, this returns null, *does not* take ownership of Buffer, and fills
+  /// in *ErrMsg with an error description if ErrMsg is non-null.
+  Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
+				   LLVMContext &Context,
+				   std::string *ErrMsg = 0);
+
+  /// getNaClStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  Module *getNaClStreamedBitcodeModule(const std::string &name,
+				       DataStreamer *streamer,
+				       LLVMContext &Context,
+				       std::string *ErrMsg = 0);
+
+  /// NaClParseBitcodeFile - Read the specified bitcode file,
+  /// returning the module.  If an error occurs, this returns null and
+  /// fills in *ErrMsg if it is non-null.  This method *never* takes
+  /// ownership of Buffer.
+  Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
+			       std::string *ErrMsg = 0);
+
   /// NaClWriteBitcodeToFile - Write the specified module to the
   /// specified raw output stream, using PNaCl wire format.  For
   /// streams where it matters, the given stream should be in "binary"
   /// mode.
   void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out);
 
+  /// isNaClBitcodeWrapper - Return true if the given bytes are the
+  /// magic bytes for an LLVM IR bitcode wrapper.
+  ///
+  inline bool isNaClBitcodeWrapper(const unsigned char *BufPtr,
+                                   const unsigned char *BufEnd) {
author	Karl Schimpf <kschimpf@google.com>	2013-05-01 10:42:30 -0700
committer	Karl Schimpf <kschimpf@google.com>	2013-05-01 10:42:30 -0700
commit	f42b26d0d46034cb2b91df810477fc0df2e67b27 (patch)
tree	bc8749b8fa6e36670df3171a27d9db2bfab608a7 /include/llvm/Bitcode
parent	595239b2b7297b62d9f804770f5f43d8bf637a0f (diff)