aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKarl Schimpf <kschimpf@google.com>2013-06-06 10:03:24 -0700
committerKarl Schimpf <kschimpf@google.com>2013-06-06 10:03:24 -0700
commit37bdd9174a1cba17b369c8c1f561e70c458e0c13 (patch)
tree1f6a984ebb94ccd819c6e38646d91bb5c7eb6977 /include
parentc0d9b337419b72e69cbd9c64f84ae39560ab344f (diff)
Make PNaCl bitcode files have a different format from LLVM bitcode files.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=dschuff@chromium.org Review URL: https://codereview.chromium.org/15907008
Diffstat (limited to 'include')
-rw-r--r--include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h219
-rw-r--r--include/llvm/Bitcode/NaCl/NaClBitstreamReader.h35
-rw-r--r--include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h2
-rw-r--r--include/llvm/Bitcode/NaCl/NaClReaderWriter.h104
4 files changed, 256 insertions, 104 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
new file mode 100644
index 0000000000..6e35f62067
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
@@ -0,0 +1,219 @@
+//===-- llvm/Bitcode/NaCl/NaClBitcodeHeader.h - ----------------*- C++ -*-===//
+// NaCl Bitcode header reader.
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// file headers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+#define LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class StreamingMemoryObject;
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known (typed) Tag IDs.
+//
+// The serialized format has 2 fixed subfields (ID:type and data length) and the
+// variable-length data subfield
+class NaClBitcodeHeaderField {
+ NaClBitcodeHeaderField(const NaClBitcodeHeaderField &) LLVM_DELETED_FUNCTION;
+ void operator=(const NaClBitcodeHeaderField &)LLVM_DELETED_FUNCTION;
+
+public:
+ // Defines the ID associated with the value. Valid values are in
+ // {0x0, ..., 0xFFF}
+ typedef enum {
+ kInvalid = 0, // KUnknownType.
+ kPNaClVersion = 1 // kUint32.
+ } Tag;
+ // Defines the type of value.
+ typedef enum {
+ kBufferType, // Buffer of form uint8_t[len].
+ kUInt32Type
+ } FieldType;
+ // Defines the number of bytes in a (32-bit) word.
+ static const int WordSize = 4;
+
+ // Defines the encoding of the fixed fields {i.e. ID:type and data length).
+ typedef uint16_t FixedSubfield;
+
+ // Create an invalid header field.
+ NaClBitcodeHeaderField();
+
+ // Create a header field with an uint32_t value.
+ NaClBitcodeHeaderField(Tag MyID, uint32_t value);
+
+ // Create a header field for the given data.
+ NaClBitcodeHeaderField(Tag MyID, size_t MyLen, uint8_t *MyData);
+
+ virtual ~NaClBitcodeHeaderField() {
+ if (Data)
+ delete[] Data;
+ }
+
+ /// \brief Number of bytes used to represent header field.
+ size_t GetTotalSize() const {
+ // Round up to 4 byte alignment
+ return (kTagLenSize + Len + (WordSize - 1)) & ~(WordSize - 1);
+ }
+
+ /// \brief Write field into Buf[BufLen].
+ bool Write(uint8_t *Buf, size_t BufLen) const;
+
+ /// \brief Read field form Buf[BufLen].
+ bool Read(const uint8_t *Buf, size_t BufLen);
+
+ /// \brief Returns string describing field.
+ std::string Contents() const;
+
+ /// \brief Get the data size from a serialized field to allow allocation.
+ static size_t GetDataSizeFromSerialized(const uint8_t *Buf) {
+ FixedSubfield Length;
+ ReadFixedSubfield(&Length, Buf + sizeof(FixedSubfield));
+ return Length;
+ }
+
+ /// \brief Return the ID of the field.
+ Tag GetID() const { return ID; }
+
+ FieldType GetType() const { return FType; }
+
+ /// \brief Return the length of the data (in bytes).
+ size_t GetLen() const { return Len; }
+
+ /// \brief Return the data. Data is array getData()[getLen()].
+ const uint8_t *GetData() const { return Data; }
+
+ /// \brief Returns the uint32_t value stored. Requires that
+ /// getType() == kUint32Type
+ uint32_t GetUInt32Value() const;
+
+private:
+ // Convert ID:Type into a fixed subfield
+ FixedSubfield EncodeTypedID() const { return (ID << 4) | FType; }
+ // Extract out ID and Type from a fixed subfield.
+ void DecodeTypedID(FixedSubfield Subfield, Tag &ID, FieldType &FType) {
+ ID = static_cast<Tag>(Subfield >> 4);
+ FType = static_cast<FieldType>(Subfield & 0xF);
+ }
+ // Combined size of the fixed subfields
+ const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+ static void WriteFixedSubfield(FixedSubfield Value, uint8_t *Buf) {
+ Buf[0] = Value & 0xFF;
+ Buf[1] = (Value >> 8) & 0xFF;
+ }
+ static void ReadFixedSubfield(FixedSubfield *Value, const uint8_t *Buf) {
+ *Value = Buf[0] | Buf[1] << 8;
+ }
+ Tag ID;
+ FieldType FType;
+ size_t Len;
+ uint8_t *Data;
+};
+
+/// \brief Class holding parsed header fields in PNaCl bitcode file.
+class NaClBitcodeHeader {
+ // The set of parsed header fields. The header takes ownership of
+ // all fields in this vector.
+ std::vector<NaClBitcodeHeaderField *> Fields;
+ // The number of bytes in the PNaCl header.
+ size_t HeaderSize;
+ // String defining why it is unsupported (if unsupported).
+ std::string UnsupportedMessage;
+ // Flag defining if header is supported.
+ bool IsSupportedFlag;
+ // Flag defining if the corresponding bitcode file is readable.
+ bool IsReadableFlag;
+ // Defines the PNaCl version defined by the header file.
+ uint32_t PNaClVersion;
+
+public:
+ static const int WordSize = NaClBitcodeHeaderField::WordSize;
+
+ NaClBitcodeHeader();
+ ~NaClBitcodeHeader();
+
+ /// \brief Installs the fields of the header, defining if the header
+ /// is readable and supported.
+ void InstallFields();
+
+ /// \brief Read the PNaCl bitcode header, The format of the header is:
+ ///
+ /// 1) 'PEXE' - The four character sequence defining the magic number.
+ /// 2) uint_16 num_fields - The number of NaClBitcodeHeaderField's.
+ /// 3) uint_16 num_bytes - The number of bytes to hold fields in
+ /// the header.
+ /// 4) NaClBitcodeHeaderField f1 - The first bitcode header field.
+ /// ...
+ /// 2 + num_fields) NaClBitcodeHeaderField fn - The last bitcode header
+ /// field.
+ ///
+ /// Returns false if able to read (all of) the bitcode header.
+ bool Read(const unsigned char *&BufPtr, const unsigned char *&BufEnd);
+
+ // \brief Read the PNaCl bitcode header, recording the fields found
+ // in the header. Returns false if able to read (all of) the bitcode header.
+ bool Read(StreamingMemoryObject *Bytes);
+
+ // \brief Returns the number of bytes read to consume the header.
+ size_t getHeaderSize() { return HeaderSize; }
+
+ /// \brief Returns C string describing why the header describes
+ /// an unsupported PNaCl Bitcode file. Returns 0 if supported.
+ const std::string Unsupported() const { return UnsupportedMessage; }
+
+ /// \brief Returns true if supported. That is, it can be run in the
+ /// browser.
+ bool IsSupported() const { return IsSupportedFlag; }
+
+ /// \brief Returns true if the bitcode file should be readable. Note
+ /// that just because it is readable, it doesn't necessarily mean that
+ /// it is supported.
+ bool IsReadable() const { return IsReadableFlag; }
+
+ /// \brief Returns number of fields defined.
+ size_t NumberFields() const { return Fields.size(); }
+
+ /// \brief Returns a pointer to the field with the given ID
+ /// (0 if no such field).
+ NaClBitcodeHeaderField *GetTaggedField(NaClBitcodeHeaderField::Tag ID) const;
+
+ /// \brief Returns a pointer to the Nth field in the header
+ /// (0 if no such field).
+ NaClBitcodeHeaderField *GetField(size_t index) const;
+
+ /// \brief Returns the PNaClVersion, as defined by the header.
+ uint32_t GetPNaClVersion() const { return PNaClVersion; }
+
+private:
+ // Reads and verifies the first 8 bytes of the header, consisting
+ // of the magic number 'PEXE', and the value defining the number
+ // of fields and number of bytes used to hold fields.
+ // Returns false if successful.
+ bool ReadPrefix(const unsigned char *BufPtr, const unsigned char *BufEnd,
+ unsigned &NumFields, unsigned &NumBytes);
+
+ // Reads and verifies the fields in the header.
+ // Returns false if successful.
+ bool ReadFields(const unsigned char *BufPtr, const unsigned char *BufEnd,
+ unsigned NumFields, unsigned NumBytes);
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
index 4c532bbc37..a338bbfe79 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -49,24 +49,30 @@ private:
std::vector<BlockInfo> BlockInfoRecords;
- /// IgnoreBlockInfoNames - This is set to true if we don't care about the
- /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
- /// uses this.
+ /// IgnoreBlockInfoNames - This is set to true if we don't care
+ /// about the block/record name information in the BlockInfo
+ /// block. Only pnacl-bcanalyzer uses this.
bool IgnoreBlockInfoNames;
+ /// \brief Holds the offset of the first byte after the header.
+ size_t InitialAddress;
+
NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
public:
- NaClBitstreamReader() : IgnoreBlockInfoNames(true) {
- }
+ NaClBitstreamReader() : IgnoreBlockInfoNames(true), InitialAddress(0) {}
NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) {
IgnoreBlockInfoNames = true;
+ InitialAddress = 0;
init(Start, End);
}
- NaClBitstreamReader(StreamableMemoryObject *bytes) {
- BitcodeBytes.reset(bytes);
+ NaClBitstreamReader(StreamableMemoryObject *Bytes,
+ size_t MyInitialAddress=0)
+ : InitialAddress(MyInitialAddress)
+ {
+ BitcodeBytes.reset(Bytes);
}
void init(const unsigned char *Start, const unsigned char *End) {
@@ -93,6 +99,11 @@ public:
void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+ /// \brief Returns the initial address (after the header) of the input stream.
+ size_t getInitialAddress() const {
+ return InitialAddress;
+ }
+
//===--------------------------------------------------------------------===//
// Block Manipulation
//===--------------------------------------------------------------------===//
@@ -210,7 +221,7 @@ public:
}
explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) {
- NextChar = 0;
+ NextChar = R.getInitialAddress();
CurWord = 0;
BitsInCurWord = 0;
}
@@ -219,7 +230,7 @@ public:
freeState();
BitStream = &R;
- NextChar = 0;
+ NextChar = R.getInitialAddress();
CurWord = 0;
BitsInCurWord = 0;
}
@@ -242,12 +253,6 @@ public:
static_cast<uint64_t>(pos - 1));
}
- uint32_t getWord(size_t pos) {
- uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
- BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL);
- return *reinterpret_cast<support::ulittle32_t *>(buf);
- }
-
bool AtEndOfStream() {
return BitsInCurWord == 0 && isEndPos(NextChar);
}
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
index b4123261a0..2237b6e29b 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
@@ -61,6 +61,7 @@ class NaClBitstreamWriter {
};
std::vector<BlockInfo> BlockInfoRecords;
+public:
// BackpatchWord - Backpatch a 32-bit word in the output with the specified
// value.
void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
@@ -70,6 +71,7 @@ class NaClBitstreamWriter {
Out[ByteNo ] = (unsigned char)(NewWord >> 24);
}
+private:
void WriteByte(unsigned char Value) {
Out.push_back(Value);
}
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
index d505af0a1f..53feb8ab86 100644
--- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -31,24 +31,27 @@ namespace llvm {
/// error, this returns null, *does not* take ownership of Buffer, and fills
/// in *ErrMsg with an error description if ErrMsg is non-null.
Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
- LLVMContext &Context,
- std::string *ErrMsg = 0);
+ LLVMContext &Context,
+ std::string *ErrMsg = 0,
+ bool AcceptSupportedOnly = true);
/// getNaClStreamedBitcodeModule - Read the header of the specified stream
/// and prepare for lazy deserialization and streaming of function bodies.
/// On error, this returns null, and fills in *ErrMsg with an error
/// description if ErrMsg is non-null.
Module *getNaClStreamedBitcodeModule(const std::string &name,
- DataStreamer *streamer,
- LLVMContext &Context,
- std::string *ErrMsg = 0);
+ DataStreamer *streamer,
+ LLVMContext &Context,
+ std::string *ErrMsg = 0,
+ bool AcceptSupportedOnly = true);
/// NaClParseBitcodeFile - Read the specified bitcode file,
/// returning the module. If an error occurs, this returns null and
/// fills in *ErrMsg if it is non-null. This method *never* takes
/// ownership of Buffer.
Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
- std::string *ErrMsg = 0);
+ std::string *ErrMsg = 0,
+ bool AcceptSupportedOnly = true);
/// NaClWriteBitcodeToFile - Write the specified module to the
/// specified raw output stream, using PNaCl wire format. For
@@ -56,93 +59,16 @@ namespace llvm {
/// mode.
void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out);
- /// isNaClBitcodeWrapper - Return true if the given bytes are the
- /// magic bytes for an LLVM IR bitcode wrapper.
- ///
- inline bool isNaClBitcodeWrapper(const unsigned char *BufPtr,
- const unsigned char *BufEnd) {
- // See if you can find the hidden message in the magic bytes :-).
- // (Hint: it's a little-endian encoding.)
- return BufPtr != BufEnd &&
- BufPtr[0] == 0xDE &&
- BufPtr[1] == 0xC0 &&
- BufPtr[2] == 0x17 &&
- BufPtr[3] == 0x0B;
- }
-
- /// isNaClRawBitcode - Return true if the given bytes are the magic
- /// bytes for raw LLVM IR bitcode (without a wrapper).
- ///
- inline bool isNaClRawBitcode(const unsigned char *BufPtr,
- const unsigned char *BufEnd) {
- // These bytes sort of have a hidden message, but it's not in
- // little-endian this time, and it's a little redundant.
- return BufPtr != BufEnd &&
- BufPtr[0] == 'B' &&
- BufPtr[1] == 'C' &&
- BufPtr[2] == 0xc0 &&
- BufPtr[3] == 0xde;
- }
-
/// isNaClBitcode - Return true if the given bytes are the magic bytes for
- /// LLVM IR bitcode, either with or without a wrapper.
+ /// PNaCl bitcode wire format.
///
inline bool isNaClBitcode(const unsigned char *BufPtr,
const unsigned char *BufEnd) {
- return isNaClBitcodeWrapper(BufPtr, BufEnd) ||
- isNaClRawBitcode(BufPtr, BufEnd);
- }
-
- /// SkipNaClBitcodeWrapperHeader - Some systems wrap bc files with a
- /// special header for padding or other reasons. The format of this
- /// header is:
- ///
- /// struct bc_header {
- /// uint32_t Magic; // 0x0B17C0DE
- /// uint32_t Version; // Version, currently always 0.
- /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
- /// uint32_t BitcodeSize; // Size of traditional bitcode file.
- /// ... potentially other gunk ...
- /// };
- ///
- /// TODO(kschimpf): Consider changing Magic and/or gunk to communicate
- /// file is PNaCl wire format file (rather than LLVM bitcode).
- ///
- /// TODO(kschimpf): Add code to read gunk in, and store it so it is
- /// accessable.
- ///
- /// This function is called when we find a file with a matching magic number.
- /// In this case, skip down to the subsection of the file that is actually a
- /// BC file.
- /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
- /// contain the whole bitcode file.
- inline bool SkipNaClBitcodeWrapperHeader(const unsigned char *&BufPtr,
- const unsigned char *&BufEnd,
- bool VerifyBufferSize) {
- enum {
- KnownHeaderSize = 4*4, // Size of header we read.
- OffsetField = 2*4, // Offset in bytes to Offset field.
- SizeField = 3*4 // Offset in bytes to Size field.
- };
-
- // Must contain the header!
- if (BufEnd-BufPtr < KnownHeaderSize) return true;
-
- unsigned Offset = ( BufPtr[OffsetField ] |
- (BufPtr[OffsetField+1] << 8) |
- (BufPtr[OffsetField+2] << 16) |
- (BufPtr[OffsetField+3] << 24));
- unsigned Size = ( BufPtr[SizeField ] |
- (BufPtr[SizeField +1] << 8) |
- (BufPtr[SizeField +2] << 16) |
- (BufPtr[SizeField +3] << 24));
-
- // Verify that Offset+Size fits in the file.
- if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
- return true;
- BufPtr += Offset;
- BufEnd = BufPtr+Size;
- return false;
+ return BufPtr+4 <= BufEnd &&
+ BufPtr[0] == 'P' &&
+ BufPtr[1] == 'E' &&
+ BufPtr[2] == 'X' &&
+ BufPtr[3] == 'E';
}
} // end llvm namespace