Make PNaCl bitcode files have a different format from LLVM bitcode files.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=dschuff@chromium.org Review URL: https://codereview.chromium.org/15907008
author: Karl Schimpf <kschimpf@google.com> 2013-06-06 10:03:24 -0700
committer: Karl Schimpf <kschimpf@google.com> 2013-06-06 10:03:24 -0700
commit: 37bdd9174a1cba17b369c8c1f561e70c458e0c13 (patch)
tree: 1f6a984ebb94ccd819c6e38646d91bb5c7eb6977 /include
parent: c0d9b337419b72e69cbd9c64f84ae39560ab344f (diff)
4 files changed, 256 insertions, 104 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
new file mode 100644
index 0000000000..6e35f62067
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
@@ -0,0 +1,219 @@
+//===-- llvm/Bitcode/NaCl/NaClBitcodeHeader.h - ----------------*- C++ -*-===//
+//      NaCl Bitcode header reader.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// file headers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+#define LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class StreamingMemoryObject;
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known (typed) Tag IDs.
+//
+// The serialized format has 2 fixed subfields (ID:type and data length) and the
+// variable-length data subfield
+class NaClBitcodeHeaderField {
+  NaClBitcodeHeaderField(const NaClBitcodeHeaderField &) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeHeaderField &)LLVM_DELETED_FUNCTION;
+
+public:
+  // Defines the ID associated with the value. Valid values are in
+  // {0x0, ..., 0xFFF}
+  typedef enum {
+    kInvalid = 0,     // KUnknownType.
+    kPNaClVersion = 1 // kUint32.
+  } Tag;
+  // Defines the type of value.
+  typedef enum {
+    kBufferType, // Buffer of form uint8_t[len].
+    kUInt32Type
+  } FieldType;
+  // Defines the number of bytes in a (32-bit) word.
+  static const int WordSize = 4;
+
+  // Defines the encoding of the fixed fields {i.e. ID:type and data length).
+  typedef uint16_t FixedSubfield;
+
+  // Create an invalid header field.
+  NaClBitcodeHeaderField();
+
+  // Create a header field with an uint32_t value.
+  NaClBitcodeHeaderField(Tag MyID, uint32_t value);
+
+  // Create a header field for the given data.
+  NaClBitcodeHeaderField(Tag MyID, size_t MyLen, uint8_t *MyData);
+
+  virtual ~NaClBitcodeHeaderField() {
+    if (Data)
+      delete[] Data;
+  }
+
+  /// \brief Number of bytes used to represent header field.
+  size_t GetTotalSize() const {
+    // Round up to 4 byte alignment
+    return (kTagLenSize + Len + (WordSize - 1)) & ~(WordSize - 1);
+  }
+
+  /// \brief Write field into Buf[BufLen].
+  bool Write(uint8_t *Buf, size_t BufLen) const;
+
+  /// \brief Read field form Buf[BufLen].
+  bool Read(const uint8_t *Buf, size_t BufLen);
+
+  /// \brief Returns string describing field.
+  std::string Contents() const;
+
+  /// \brief Get the data size from a serialized field to allow allocation.
+  static size_t GetDataSizeFromSerialized(const uint8_t *Buf) {
+    FixedSubfield Length;
+    ReadFixedSubfield(&Length, Buf + sizeof(FixedSubfield));
+    return Length;
+  }
+
+  /// \brief Return the ID of the field.
+  Tag GetID() const { return ID; }
+
+  FieldType GetType() const { return FType; }
+
+  /// \brief Return the length of the data (in bytes).
+  size_t GetLen() const { return Len; }
+
+  /// \brief Return the data. Data is array getData()[getLen()].
+  const uint8_t *GetData() const { return Data; }
+
+  /// \brief Returns the uint32_t value stored. Requires that
+  /// getType() == kUint32Type
+  uint32_t GetUInt32Value() const;
+
+private:
+  // Convert ID:Type into a fixed subfield
+  FixedSubfield EncodeTypedID() const { return (ID << 4) | FType; }
+  // Extract out ID and Type from a fixed subfield.
+  void DecodeTypedID(FixedSubfield Subfield, Tag &ID, FieldType &FType) {
+    ID = static_cast<Tag>(Subfield >> 4);
+    FType = static_cast<FieldType>(Subfield & 0xF);
+  }
+  // Combined size of the fixed subfields
+  const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+  static void WriteFixedSubfield(FixedSubfield Value, uint8_t *Buf) {
+    Buf[0] = Value & 0xFF;
+    Buf[1] = (Value >> 8) & 0xFF;
+  }
+  static void ReadFixedSubfield(FixedSubfield *Value, const uint8_t *Buf) {
+    *Value = Buf[0] | Buf[1] << 8;
+  }
+  Tag ID;
+  FieldType FType;
+  size_t Len;
+  uint8_t *Data;
+};
+
+/// \brief Class holding parsed header fields in PNaCl bitcode file.
+class NaClBitcodeHeader {
+  // The set of parsed header fields. The header takes ownership of
+  // all fields in this vector.
+  std::vector<NaClBitcodeHeaderField *> Fields;
+  // The number of bytes in the PNaCl header.
+  size_t HeaderSize;
+  // String defining why it is unsupported (if unsupported).
+  std::string UnsupportedMessage;
+  // Flag defining if header is supported.
+  bool IsSupportedFlag;
+  // Flag defining if the corresponding bitcode file is readable.
+  bool IsReadableFlag;
+  // Defines the PNaCl version defined by the header file.
+  uint32_t PNaClVersion;
+
+public:
+  static const int WordSize = NaClBitcodeHeaderField::WordSize;
+
+  NaClBitcodeHeader();
+  ~NaClBitcodeHeader();
+
+  /// \brief Installs the fields of the header, defining if the header
+  /// is readable and supported.
+  void InstallFields();
+
+  /// \brief Read the PNaCl bitcode header, The format of the header is:
+  ///
+  ///    1) 'PEXE' - The four character sequence defining the magic number.
+  ///    2) uint_16 num_fields - The number of NaClBitcodeHeaderField's.
+  ///    3) uint_16 num_bytes - The number of bytes to hold fields in
+  ///                           the header.
+  ///    4) NaClBitcodeHeaderField f1 - The first bitcode header field.
+  ///    ...
+  ///    2 + num_fields) NaClBitcodeHeaderField fn - The last bitcode header
+  /// field.
+  ///
+  /// Returns false if able to read (all of) the bitcode header.
+  bool Read(const unsigned char *&BufPtr, const unsigned char *&BufEnd);
+
+  // \brief Read the PNaCl bitcode header, recording the fields found
+  // in the header. Returns false if able to read (all of) the bitcode header.
+  bool Read(StreamingMemoryObject *Bytes);
+
+  // \brief Returns the number of bytes read to consume the header.
+  size_t getHeaderSize() { return HeaderSize; }
+
+  /// \brief Returns C string describing why the header describes
+  /// an unsupported PNaCl Bitcode file. Returns 0 if supported.
+  const std::string Unsupported() const { return UnsupportedMessage; }
+
+  /// \brief Returns true if supported. That is, it can be run in the
+  /// browser.
+  bool IsSupported() const { return IsSupportedFlag; }
+
+  /// \brief Returns true if the bitcode file should be readable. Note
+  /// that just because it is readable, it doesn't necessarily mean that
+  /// it is supported.
+  bool IsReadable() const { return IsReadableFlag; }
+
+  /// \brief Returns number of fields defined.
+  size_t NumberFields() const { return Fields.size(); }
+
+  /// \brief Returns a pointer to the field with the given ID
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetTaggedField(NaClBitcodeHeaderField::Tag ID) const;
+
+  /// \brief Returns a pointer to the Nth field in the header
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetField(size_t index) const;
+
+  /// \brief Returns the PNaClVersion, as defined by the header.
+  uint32_t GetPNaClVersion() const { return PNaClVersion; }
+
+private:
+  // Reads and verifies the first 8 bytes of the header, consisting
+  // of the magic number 'PEXE', and the value defining the number
+  // of fields and number of bytes used to hold fields.
+  // Returns false if successful.
+  bool ReadPrefix(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned &NumFields, unsigned &NumBytes);
+
+  // Reads and verifies the fields in the header.
+  // Returns false if successful.
+  bool ReadFields(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned NumFields, unsigned NumBytes);
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
index 4c532bbc37..a338bbfe79 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -49,24 +49,30 @@ private:
 
   std::vector<BlockInfo> BlockInfoRecords;
 
-  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
-  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
-  /// uses this.
+  /// IgnoreBlockInfoNames - This is set to true if we don't care
+  /// about the block/record name information in the BlockInfo
+  /// block. Only pnacl-bcanalyzer uses this.
   bool IgnoreBlockInfoNames;
 
+  /// \brief Holds the offset of the first byte after the header.
+  size_t InitialAddress;
+
   NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
   void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
 public:
-  NaClBitstreamReader() : IgnoreBlockInfoNames(true) {
-  }
+  NaClBitstreamReader() : IgnoreBlockInfoNames(true), InitialAddress(0) {}
 
   NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) {
     IgnoreBlockInfoNames = true;
+    InitialAddress = 0;
     init(Start, End);
   }
 
-  NaClBitstreamReader(StreamableMemoryObject *bytes) {
-    BitcodeBytes.reset(bytes);
+  NaClBitstreamReader(StreamableMemoryObject *Bytes,
+                      size_t MyInitialAddress=0)
+      : InitialAddress(MyInitialAddress)
+  {
+    BitcodeBytes.reset(Bytes);
   }
 
   void init(const unsigned char *Start, const unsigned char *End) {
@@ -93,6 +99,11 @@ public:
   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
 
+  /// \brief Returns the initial address (after the header) of the input stream.
+  size_t getInitialAddress() const {
+    return InitialAddress;
+  }
+
   //===--------------------------------------------------------------------===//
   // Block Manipulation
   //===--------------------------------------------------------------------===//
@@ -210,7 +221,7 @@ public:
   }
 
   explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) {
-    NextChar = 0;
+    NextChar = R.getInitialAddress();
     CurWord = 0;
     BitsInCurWord = 0;
   }
@@ -219,7 +230,7 @@ public:
     freeState();
 
     BitStream = &R;
-    NextChar = 0;
+    NextChar = R.getInitialAddress();
     CurWord = 0;
     BitsInCurWord = 0;
   }
@@ -242,12 +253,6 @@ public:
         static_cast<uint64_t>(pos - 1));
   }
 
-  uint32_t getWord(size_t pos) {
-    uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
-    BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL);
-    return *reinterpret_cast<support::ulittle32_t *>(buf);
-  }
-
   bool AtEndOfStream() {
     return BitsInCurWord == 0 && isEndPos(NextChar);
   }
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
index b4123261a0..2237b6e29b 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
@@ -61,6 +61,7 @@ class NaClBitstreamWriter {
   };
   std::vector<BlockInfo> BlockInfoRecords;
 
+public:
   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
   // value.
   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
@@ -70,6 +71,7 @@ class NaClBitstreamWriter {
     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
   }
 
+private:
   void WriteByte(unsigned char Value) {
     Out.push_back(Value);
   }
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
index d505af0a1f..53feb8ab86 100644
--- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -31,24 +31,27 @@ namespace llvm {
   /// error, this returns null, *does not* take ownership of Buffer, and fills
   /// in *ErrMsg with an error description if ErrMsg is non-null.
   Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
-				   LLVMContext &Context,
-				   std::string *ErrMsg = 0);
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0,
+                                   bool AcceptSupportedOnly = true);
 
   /// getNaClStreamedBitcodeModule - Read the header of the specified stream
   /// and prepare for lazy deserialization and streaming of function bodies.
   /// On error, this returns null, and fills in *ErrMsg with an error
   /// description if ErrMsg is non-null.
   Module *getNaClStreamedBitcodeModule(const std::string &name,
-				       DataStreamer *streamer,
-				       LLVMContext &Context,
-				       std::string *ErrMsg = 0);
+                                       DataStreamer *streamer,
+                                       LLVMContext &Context,
+                                       std::string *ErrMsg = 0,
+                                       bool AcceptSupportedOnly = true);
 
   /// NaClParseBitcodeFile - Read the specified bitcode file,
   /// returning the module.  If an error occurs, this returns null and
   /// fills in *ErrMsg if it is non-null.  This method *never* takes
   /// ownership of Buffer.
   Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
-			       std::string *ErrMsg = 0);
+                               std::string *ErrMsg = 0,
+                               bool AcceptSupportedOnly = true);
 
   /// NaClWriteBitcodeToFile - Write the specified module to the
   /// specified raw output stream, using PNaCl wire format.  For
@@ -56,93 +59,16 @@ namespace llvm {
   /// mode.
   void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out);
 
-  /// isNaClBitcodeWrapper - Return true if the given bytes are the
-  /// magic bytes for an LLVM IR bitcode wrapper.
-  ///
-  inline bool isNaClBitcodeWrapper(const unsigned char *BufPtr,
-                                   const unsigned char *BufEnd) {
-    // See if you can find the hidden message in the magic bytes :-).
-    // (Hint: it's a little-endian encoding.)
-    return BufPtr != BufEnd &&
-           BufPtr[0] == 0xDE &&
-           BufPtr[1] == 0xC0 &&
-           BufPtr[2] == 0x17 &&
-           BufPtr[3] == 0x0B;
-  }
-
-  /// isNaClRawBitcode - Return true if the given bytes are the magic
-  /// bytes for raw LLVM IR bitcode (without a wrapper).
-  ///
-  inline bool isNaClRawBitcode(const unsigned char *BufPtr,
-                               const unsigned char *BufEnd) {
-    // These bytes sort of have a hidden message, but it's not in
-    // little-endian this time, and it's a little redundant.
-    return BufPtr != BufEnd &&
-           BufPtr[0] == 'B' &&
-           BufPtr[1] == 'C' &&
-           BufPtr[2] == 0xc0 &&
-           BufPtr[3] == 0xde;
-  }
-
   /// isNaClBitcode - Return true if the given bytes are the magic bytes for
-  /// LLVM IR bitcode, either with or without a wrapper.
+  /// PNaCl bitcode wire format.
   ///
   inline bool isNaClBitcode(const unsigned char *BufPtr,
                         const unsigned char *BufEnd) {
-    return isNaClBitcodeWrapper(BufPtr, BufEnd) ||
-           isNaClRawBitcode(BufPtr, BufEnd);
-  }
-
-  /// SkipNaClBitcodeWrapperHeader - Some systems wrap bc files with a
-  /// special header for padding or other reasons.  The format of this
-  /// header is:
-  ///
-  /// struct bc_header {
-  ///   uint32_t Magic;         // 0x0B17C0DE
-  ///   uint32_t Version;       // Version, currently always 0.
-  ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
-  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
-  ///   ... potentially other gunk ...
-  /// };
-  ///
-  /// TODO(kschimpf): Consider changing Magic and/or gunk to communicate
-  ///     file is PNaCl wire format file (rather than LLVM bitcode).
-  ///
-  /// TODO(kschimpf): Add code to read gunk in, and store it so it is
-  /// accessable.
-  ///
-  /// This function is called when we find a file with a matching magic number.
-  /// In this case, skip down to the subsection of the file that is actually a
-  /// BC file.
-  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
-  /// contain the whole bitcode file.
-  inline bool SkipNaClBitcodeWrapperHeader(const unsigned char *&BufPtr,
-                                           const unsigned char *&BufEnd,
-                                           bool VerifyBufferSize) {
-    enum {
-      KnownHeaderSize = 4*4,  // Size of header we read.
-      OffsetField = 2*4,      // Offset in bytes to Offset field.
-      SizeField = 3*4         // Offset in bytes to Size field.
-    };
-
-    // Must contain the header!
-    if (BufEnd-BufPtr < KnownHeaderSize) return true;
-
-    unsigned Offset = ( BufPtr[OffsetField  ]        |
-                       (BufPtr[OffsetField+1] << 8)  |
-                       (BufPtr[OffsetField+2] << 16) |
-                       (BufPtr[OffsetField+3] << 24));
-    unsigned Size   = ( BufPtr[SizeField    ]        |
-                       (BufPtr[SizeField  +1] << 8)  |
-                       (BufPtr[SizeField  +2] << 16) |
-                       (BufPtr[SizeField  +3] << 24));
-
-    // Verify that Offset+Size fits in the file.
-    if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
-      return true;
-    BufPtr += Offset;
-    BufEnd = BufPtr+Size;
-    return false;
+    return BufPtr+4 <= BufEnd &&
+        BufPtr[0] == 'P' &&
+        BufPtr[1] == 'E' &&
+        BufPtr[2] == 'X' &&
+        BufPtr[3] == 'E';
   }
 
 } // end llvm namespace
author	Karl Schimpf <kschimpf@google.com>	2013-06-06 10:03:24 -0700
committer	Karl Schimpf <kschimpf@google.com>	2013-06-06 10:03:24 -0700
commit	37bdd9174a1cba17b369c8c1f561e70c458e0c13 (patch)
tree	1f6a984ebb94ccd819c6e38646d91bb5c7eb6977 /include
parent	c0d9b337419b72e69cbd9c64f84ae39560ab344f (diff)