diff options
author | Karl Schimpf <kschimpf@google.com> | 2013-06-06 10:03:24 -0700 |
---|---|---|
committer | Karl Schimpf <kschimpf@google.com> | 2013-06-06 10:03:24 -0700 |
commit | 37bdd9174a1cba17b369c8c1f561e70c458e0c13 (patch) | |
tree | 1f6a984ebb94ccd819c6e38646d91bb5c7eb6977 /include | |
parent | c0d9b337419b72e69cbd9c64f84ae39560ab344f (diff) |
Make PNaCl bitcode files have a different format from LLVM bitcode files.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405
R=dschuff@chromium.org
Review URL: https://codereview.chromium.org/15907008
Diffstat (limited to 'include')
-rw-r--r-- | include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h | 219 | ||||
-rw-r--r-- | include/llvm/Bitcode/NaCl/NaClBitstreamReader.h | 35 | ||||
-rw-r--r-- | include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h | 2 | ||||
-rw-r--r-- | include/llvm/Bitcode/NaCl/NaClReaderWriter.h | 104 |
4 files changed, 256 insertions, 104 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h new file mode 100644 index 0000000000..6e35f62067 --- /dev/null +++ b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h @@ -0,0 +1,219 @@ +//===-- llvm/Bitcode/NaCl/NaClBitcodeHeader.h - ----------------*- C++ -*-===// +// NaCl Bitcode header reader. +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines interfaces to read and write NaCl bitcode wire format +// file headers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_NACL_NACLBITCODEHEADER_H +#define LLVM_BITCODE_NACL_NACLBITCODEHEADER_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include <string> +#include <vector> + +namespace llvm { +class StreamingMemoryObject; + +// Class representing a variable-size metadata field in the bitcode header. +// Also contains the list of known (typed) Tag IDs. +// +// The serialized format has 2 fixed subfields (ID:type and data length) and the +// variable-length data subfield +class NaClBitcodeHeaderField { + NaClBitcodeHeaderField(const NaClBitcodeHeaderField &) LLVM_DELETED_FUNCTION; + void operator=(const NaClBitcodeHeaderField &)LLVM_DELETED_FUNCTION; + +public: + // Defines the ID associated with the value. Valid values are in + // {0x0, ..., 0xFFF} + typedef enum { + kInvalid = 0, // KUnknownType. + kPNaClVersion = 1 // kUint32. + } Tag; + // Defines the type of value. + typedef enum { + kBufferType, // Buffer of form uint8_t[len]. + kUInt32Type + } FieldType; + // Defines the number of bytes in a (32-bit) word. + static const int WordSize = 4; + + // Defines the encoding of the fixed fields {i.e. ID:type and data length). + typedef uint16_t FixedSubfield; + + // Create an invalid header field. + NaClBitcodeHeaderField(); + + // Create a header field with an uint32_t value. + NaClBitcodeHeaderField(Tag MyID, uint32_t value); + + // Create a header field for the given data. + NaClBitcodeHeaderField(Tag MyID, size_t MyLen, uint8_t *MyData); + + virtual ~NaClBitcodeHeaderField() { + if (Data) + delete[] Data; + } + + /// \brief Number of bytes used to represent header field. + size_t GetTotalSize() const { + // Round up to 4 byte alignment + return (kTagLenSize + Len + (WordSize - 1)) & ~(WordSize - 1); + } + + /// \brief Write field into Buf[BufLen]. + bool Write(uint8_t *Buf, size_t BufLen) const; + + /// \brief Read field form Buf[BufLen]. + bool Read(const uint8_t *Buf, size_t BufLen); + + /// \brief Returns string describing field. + std::string Contents() const; + + /// \brief Get the data size from a serialized field to allow allocation. + static size_t GetDataSizeFromSerialized(const uint8_t *Buf) { + FixedSubfield Length; + ReadFixedSubfield(&Length, Buf + sizeof(FixedSubfield)); + return Length; + } + + /// \brief Return the ID of the field. + Tag GetID() const { return ID; } + + FieldType GetType() const { return FType; } + + /// \brief Return the length of the data (in bytes). + size_t GetLen() const { return Len; } + + /// \brief Return the data. Data is array getData()[getLen()]. + const uint8_t *GetData() const { return Data; } + + /// \brief Returns the uint32_t value stored. Requires that + /// getType() == kUint32Type + uint32_t GetUInt32Value() const; + +private: + // Convert ID:Type into a fixed subfield + FixedSubfield EncodeTypedID() const { return (ID << 4) | FType; } + // Extract out ID and Type from a fixed subfield. + void DecodeTypedID(FixedSubfield Subfield, Tag &ID, FieldType &FType) { + ID = static_cast<Tag>(Subfield >> 4); + FType = static_cast<FieldType>(Subfield & 0xF); + } + // Combined size of the fixed subfields + const static size_t kTagLenSize = 2 * sizeof(FixedSubfield); + static void WriteFixedSubfield(FixedSubfield Value, uint8_t *Buf) { + Buf[0] = Value & 0xFF; + Buf[1] = (Value >> 8) & 0xFF; + } + static void ReadFixedSubfield(FixedSubfield *Value, const uint8_t *Buf) { + *Value = Buf[0] | Buf[1] << 8; + } + Tag ID; + FieldType FType; + size_t Len; + uint8_t *Data; +}; + +/// \brief Class holding parsed header fields in PNaCl bitcode file. +class NaClBitcodeHeader { + // The set of parsed header fields. The header takes ownership of + // all fields in this vector. + std::vector<NaClBitcodeHeaderField *> Fields; + // The number of bytes in the PNaCl header. + size_t HeaderSize; + // String defining why it is unsupported (if unsupported). + std::string UnsupportedMessage; + // Flag defining if header is supported. + bool IsSupportedFlag; + // Flag defining if the corresponding bitcode file is readable. + bool IsReadableFlag; + // Defines the PNaCl version defined by the header file. + uint32_t PNaClVersion; + +public: + static const int WordSize = NaClBitcodeHeaderField::WordSize; + + NaClBitcodeHeader(); + ~NaClBitcodeHeader(); + + /// \brief Installs the fields of the header, defining if the header + /// is readable and supported. + void InstallFields(); + + /// \brief Read the PNaCl bitcode header, The format of the header is: + /// + /// 1) 'PEXE' - The four character sequence defining the magic number. + /// 2) uint_16 num_fields - The number of NaClBitcodeHeaderField's. + /// 3) uint_16 num_bytes - The number of bytes to hold fields in + /// the header. + /// 4) NaClBitcodeHeaderField f1 - The first bitcode header field. + /// ... + /// 2 + num_fields) NaClBitcodeHeaderField fn - The last bitcode header + /// field. + /// + /// Returns false if able to read (all of) the bitcode header. + bool Read(const unsigned char *&BufPtr, const unsigned char *&BufEnd); + + // \brief Read the PNaCl bitcode header, recording the fields found + // in the header. Returns false if able to read (all of) the bitcode header. + bool Read(StreamingMemoryObject *Bytes); + + // \brief Returns the number of bytes read to consume the header. + size_t getHeaderSize() { return HeaderSize; } + + /// \brief Returns C string describing why the header describes + /// an unsupported PNaCl Bitcode file. Returns 0 if supported. + const std::string Unsupported() const { return UnsupportedMessage; } + + /// \brief Returns true if supported. That is, it can be run in the + /// browser. + bool IsSupported() const { return IsSupportedFlag; } + + /// \brief Returns true if the bitcode file should be readable. Note + /// that just because it is readable, it doesn't necessarily mean that + /// it is supported. + bool IsReadable() const { return IsReadableFlag; } + + /// \brief Returns number of fields defined. + size_t NumberFields() const { return Fields.size(); } + + /// \brief Returns a pointer to the field with the given ID + /// (0 if no such field). + NaClBitcodeHeaderField *GetTaggedField(NaClBitcodeHeaderField::Tag ID) const; + + /// \brief Returns a pointer to the Nth field in the header + /// (0 if no such field). + NaClBitcodeHeaderField *GetField(size_t index) const; + + /// \brief Returns the PNaClVersion, as defined by the header. + uint32_t GetPNaClVersion() const { return PNaClVersion; } + +private: + // Reads and verifies the first 8 bytes of the header, consisting + // of the magic number 'PEXE', and the value defining the number + // of fields and number of bytes used to hold fields. + // Returns false if successful. + bool ReadPrefix(const unsigned char *BufPtr, const unsigned char *BufEnd, + unsigned &NumFields, unsigned &NumBytes); + + // Reads and verifies the fields in the header. + // Returns false if successful. + bool ReadFields(const unsigned char *BufPtr, const unsigned char *BufEnd, + unsigned NumFields, unsigned NumBytes); + +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h index 4c532bbc37..a338bbfe79 100644 --- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h +++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h @@ -49,24 +49,30 @@ private: std::vector<BlockInfo> BlockInfoRecords; - /// IgnoreBlockInfoNames - This is set to true if we don't care about the - /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer - /// uses this. + /// IgnoreBlockInfoNames - This is set to true if we don't care + /// about the block/record name information in the BlockInfo + /// block. Only pnacl-bcanalyzer uses this. bool IgnoreBlockInfoNames; + /// \brief Holds the offset of the first byte after the header. + size_t InitialAddress; + NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION; void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION; public: - NaClBitstreamReader() : IgnoreBlockInfoNames(true) { - } + NaClBitstreamReader() : IgnoreBlockInfoNames(true), InitialAddress(0) {} NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) { IgnoreBlockInfoNames = true; + InitialAddress = 0; init(Start, End); } - NaClBitstreamReader(StreamableMemoryObject *bytes) { - BitcodeBytes.reset(bytes); + NaClBitstreamReader(StreamableMemoryObject *Bytes, + size_t MyInitialAddress=0) + : InitialAddress(MyInitialAddress) + { + BitcodeBytes.reset(Bytes); } void init(const unsigned char *Start, const unsigned char *End) { @@ -93,6 +99,11 @@ public: void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } + /// \brief Returns the initial address (after the header) of the input stream. + size_t getInitialAddress() const { + return InitialAddress; + } + //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -210,7 +221,7 @@ public: } explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) { - NextChar = 0; + NextChar = R.getInitialAddress(); CurWord = 0; BitsInCurWord = 0; } @@ -219,7 +230,7 @@ public: freeState(); BitStream = &R; - NextChar = 0; + NextChar = R.getInitialAddress(); CurWord = 0; BitsInCurWord = 0; } @@ -242,12 +253,6 @@ public: static_cast<uint64_t>(pos - 1)); } - uint32_t getWord(size_t pos) { - uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; - BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL); - return *reinterpret_cast<support::ulittle32_t *>(buf); - } - bool AtEndOfStream() { return BitsInCurWord == 0 && isEndPos(NextChar); } diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h index b4123261a0..2237b6e29b 100644 --- a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h +++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h @@ -61,6 +61,7 @@ class NaClBitstreamWriter { }; std::vector<BlockInfo> BlockInfoRecords; +public: // BackpatchWord - Backpatch a 32-bit word in the output with the specified // value. void BackpatchWord(unsigned ByteNo, unsigned NewWord) { @@ -70,6 +71,7 @@ class NaClBitstreamWriter { Out[ByteNo ] = (unsigned char)(NewWord >> 24); } +private: void WriteByte(unsigned char Value) { Out.push_back(Value); } diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h index d505af0a1f..53feb8ab86 100644 --- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h +++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h @@ -31,24 +31,27 @@ namespace llvm { /// error, this returns null, *does not* take ownership of Buffer, and fills /// in *ErrMsg with an error description if ErrMsg is non-null. Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer, - LLVMContext &Context, - std::string *ErrMsg = 0); + LLVMContext &Context, + std::string *ErrMsg = 0, + bool AcceptSupportedOnly = true); /// getNaClStreamedBitcodeModule - Read the header of the specified stream /// and prepare for lazy deserialization and streaming of function bodies. /// On error, this returns null, and fills in *ErrMsg with an error /// description if ErrMsg is non-null. Module *getNaClStreamedBitcodeModule(const std::string &name, - DataStreamer *streamer, - LLVMContext &Context, - std::string *ErrMsg = 0); + DataStreamer *streamer, + LLVMContext &Context, + std::string *ErrMsg = 0, + bool AcceptSupportedOnly = true); /// NaClParseBitcodeFile - Read the specified bitcode file, /// returning the module. If an error occurs, this returns null and /// fills in *ErrMsg if it is non-null. This method *never* takes /// ownership of Buffer. Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context, - std::string *ErrMsg = 0); + std::string *ErrMsg = 0, + bool AcceptSupportedOnly = true); /// NaClWriteBitcodeToFile - Write the specified module to the /// specified raw output stream, using PNaCl wire format. For @@ -56,93 +59,16 @@ namespace llvm { /// mode. void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out); - /// isNaClBitcodeWrapper - Return true if the given bytes are the - /// magic bytes for an LLVM IR bitcode wrapper. - /// - inline bool isNaClBitcodeWrapper(const unsigned char *BufPtr, - const unsigned char *BufEnd) { - // See if you can find the hidden message in the magic bytes :-). - // (Hint: it's a little-endian encoding.) - return BufPtr != BufEnd && - BufPtr[0] == 0xDE && - BufPtr[1] == 0xC0 && - BufPtr[2] == 0x17 && - BufPtr[3] == 0x0B; - } - - /// isNaClRawBitcode - Return true if the given bytes are the magic - /// bytes for raw LLVM IR bitcode (without a wrapper). - /// - inline bool isNaClRawBitcode(const unsigned char *BufPtr, - const unsigned char *BufEnd) { - // These bytes sort of have a hidden message, but it's not in - // little-endian this time, and it's a little redundant. - return BufPtr != BufEnd && - BufPtr[0] == 'B' && - BufPtr[1] == 'C' && - BufPtr[2] == 0xc0 && - BufPtr[3] == 0xde; - } - /// isNaClBitcode - Return true if the given bytes are the magic bytes for - /// LLVM IR bitcode, either with or without a wrapper. + /// PNaCl bitcode wire format. /// inline bool isNaClBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd) { - return isNaClBitcodeWrapper(BufPtr, BufEnd) || - isNaClRawBitcode(BufPtr, BufEnd); - } - - /// SkipNaClBitcodeWrapperHeader - Some systems wrap bc files with a - /// special header for padding or other reasons. The format of this - /// header is: - /// - /// struct bc_header { - /// uint32_t Magic; // 0x0B17C0DE - /// uint32_t Version; // Version, currently always 0. - /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. - /// uint32_t BitcodeSize; // Size of traditional bitcode file. - /// ... potentially other gunk ... - /// }; - /// - /// TODO(kschimpf): Consider changing Magic and/or gunk to communicate - /// file is PNaCl wire format file (rather than LLVM bitcode). - /// - /// TODO(kschimpf): Add code to read gunk in, and store it so it is - /// accessable. - /// - /// This function is called when we find a file with a matching magic number. - /// In this case, skip down to the subsection of the file that is actually a - /// BC file. - /// If 'VerifyBufferSize' is true, check that the buffer is large enough to - /// contain the whole bitcode file. - inline bool SkipNaClBitcodeWrapperHeader(const unsigned char *&BufPtr, - const unsigned char *&BufEnd, - bool VerifyBufferSize) { - enum { - KnownHeaderSize = 4*4, // Size of header we read. - OffsetField = 2*4, // Offset in bytes to Offset field. - SizeField = 3*4 // Offset in bytes to Size field. - }; - - // Must contain the header! - if (BufEnd-BufPtr < KnownHeaderSize) return true; - - unsigned Offset = ( BufPtr[OffsetField ] | - (BufPtr[OffsetField+1] << 8) | - (BufPtr[OffsetField+2] << 16) | - (BufPtr[OffsetField+3] << 24)); - unsigned Size = ( BufPtr[SizeField ] | - (BufPtr[SizeField +1] << 8) | - (BufPtr[SizeField +2] << 16) | - (BufPtr[SizeField +3] << 24)); - - // Verify that Offset+Size fits in the file. - if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr)) - return true; - BufPtr += Offset; - BufEnd = BufPtr+Size; - return false; + return BufPtr+4 <= BufEnd && + BufPtr[0] == 'P' && + BufPtr[1] == 'E' && + BufPtr[2] == 'X' && + BufPtr[3] == 'E'; } } // end llvm namespace |