Make abbreviations explicit in pnacl-freeze/thaw.

[1] Explicitly enumerate all abbreviation values, including the maximum abbreviation for each type of block. [2] Make "enter subblock" calculate number of bits needed by passing in maximum abbreviation (associated with block) rather than requiring the developer to compute this value every time a subblock is entered. *NOTE* This code changes encoding sizes to be based on the maximum allowed value, rather than requiring the developer to calculate out the number of bits needed. This change doesn't make the PNaCL bitcode files incompatable with LLVM bitcode files, since it does not effect the bitcode reader. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/14813032
author: Karl Schimpf <kschimpf@google.com> 2013-05-24 09:55:03 -0700
committer: Karl Schimpf <kschimpf@google.com> 2013-05-24 09:55:03 -0700
commit: 80b7ba7480724c773b96da24999d817b6b46ef29 (patch)
tree: 6b7dba38623ef94b89b43f7cba898c1b8017294f /include
parent: 501900058c0815d35e630d416e3d4e1373b75b9c (diff)
3 files changed, 126 insertions, 23 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClBitCodes.h b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
index 4c0f754f7b..bb52d0e20e 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitCodes.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 
 namespace llvm {
@@ -49,7 +50,21 @@ namespace naclbitc {
     UNABBREV_RECORD = 3,
 
     // This is not a code, this is a marker for the first abbrev assignment.
-    FIRST_APPLICATION_ABBREV = 4
+    // In addition, we assume up to two additional enumerated constants are
+    // added for each extension. These constants are:
+    //
+    //   PREFIX_MAX_FIXED_ABBREV
+    //   PREFIX_MAX_ABBREV
+    //
+    // PREFIX_MAX_ABBREV defines the maximal enumeration value used for
+    // the code selector of a block. If Both PREFIX_MAX_FIXED_ABBREV
+    // and PREFIX_MAX_ABBREV is defined, then PREFIX_MAX_FIXED_ABBREV
+    // defines the last code selector of the block that must be read using
+    // a single read (i.e. a FIXED read, or the first chunk of a VBR read.
+    FIRST_APPLICATION_ABBREV = 4,
+    // Defines default values for code length, if no additional selectors
+    // are added.
+    DEFAULT_MAX_ABBREV = FIRST_APPLICATION_ABBREV-1
   };
 
   /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
@@ -184,6 +199,59 @@ public:
     OperandList.push_back(OpInfo);
   }
 };
+
+/// \brief Returns number of bits needed to encode
+/// value for dense FIXED encoding.
+inline unsigned NaClBitsNeededForValue(unsigned Value) {
+  // Note: Need to handle case where Value=0xFFFFFFFF as special case,
+  // since we can't add 1 to it.
+  if (Value >= 0x80000000) return 32;
+  return Log2_32_Ceil(Value+1);
+}
+
+/// \brief Encode a signed value by moving the sign to the LSB for dense
+/// VBR encoding.
+inline uint64_t NaClEncodeSignRotatedValue(int64_t V) {
+  return (V >= 0) ? (V << 1) : ((-V << 1) | 1);
+}
+
+/// \brief Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+inline uint64_t NaClDecodeSignRotatedValue(uint64_t V) {
+  if ((V & 1) == 0)
+    return V >> 1;
+  if (V != 1)
+    return -(V >> 1);
+  // There is no such thing as -0 with integers.  "-0" really means MININT.
+  return 1ULL << 63;
+}
+
+/// \brief This class determines whether a FIXED or VBR
+/// abbreviation should be used for the selector, and the number of bits
+/// needed to capture such selectors.
+class NaClBitcodeSelectorAbbrev {
+
+public:
+  // If true, use a FIXED abbreviation. Otherwise, use a VBR abbreviation.
+  bool IsFixed;
+  // Number of bits needed for selector.
+  unsigned NumBits;
+
+  // Creates a selector range for the given values.
+  NaClBitcodeSelectorAbbrev(bool IF, unsigned NB)
+      : IsFixed(IF), NumBits(NB) {}
+
+  // Creates a selector range when no abbreviations are defined.
+  NaClBitcodeSelectorAbbrev()
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(naclbitc::DEFAULT_MAX_ABBREV)) {}
+
+  // Creates a selector range to handle fixed abbrevations up to
+  // the specified value.
+  explicit NaClBitcodeSelectorAbbrev(unsigned MaxAbbrev)
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(MaxAbbrev)) {}
+};
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
index 58c0a5d7fa..4c532bbc37 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -171,7 +171,6 @@ class NaClBitstreamCursor {
   NaClBitstreamReader *BitStream;
   size_t NextChar;
 
-  
   /// CurWord/word_t - This is the current data we have pulled from the stream
   /// but have not returned to the client.  This is specifically and
   /// intentionally defined to follow the word size of the host machine for
@@ -186,21 +185,22 @@ class NaClBitstreamCursor {
 
   // CurCodeSize - This is the declared size of code values used for the current
   // block, in bits.
-  unsigned CurCodeSize;
+  NaClBitcodeSelectorAbbrev CurCodeSize;
 
   /// CurAbbrevs - Abbrevs installed at in this block.
   std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
 
   struct Block {
-    unsigned PrevCodeSize;
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
     std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
-    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
+    explicit Block() : PrevCodeSize() {}
+    explicit Block(const NaClBitcodeSelectorAbbrev& PCS)
+        : PrevCodeSize(PCS) {}
   };
 
   /// BlockScope - This tracks the codesize of parent blocks.
   SmallVector<Block, 8> BlockScope;
 
-  
 public:
   NaClBitstreamCursor() : BitStream(0), NextChar(0) {
   }
@@ -213,7 +213,6 @@ public:
     NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
-    CurCodeSize = 2;
   }
 
   void init(NaClBitstreamReader &R) {
@@ -223,7 +222,6 @@ public:
     NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
-    CurCodeSize = 2;
   }
 
   ~NaClBitstreamCursor() {
@@ -255,7 +253,7 @@ public:
   }
 
   /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
-  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
+  unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; }
 
   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
   uint64_t GetCurrentBitNo() const {
@@ -343,7 +341,6 @@ public:
     }
   }
 
-
   uint32_t Read(unsigned NumBits) {
     assert(NumBits && NumBits <= 32 &&
            "Cannot return zero or more than 32 bits!");
@@ -459,10 +456,11 @@ private:
 public:
 
   unsigned ReadCode() {
-    return Read(CurCodeSize);
+    return CurCodeSize.IsFixed
+        ? Read(CurCodeSize.NumBits)
+        : ReadVBR(CurCodeSize.NumBits);
   }
 
-
   // Block header:
   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
index 26d9cb6b24..b4123261a0 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
@@ -33,7 +33,7 @@ class NaClBitstreamWriter {
 
   /// CurCodeSize - This is the declared size of code values used for the
   /// current block, in bits.
-  unsigned CurCodeSize;
+  NaClBitcodeSelectorAbbrev CurCodeSize;
 
   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
   /// selected BLOCK ID.
@@ -43,10 +43,11 @@ class NaClBitstreamWriter {
   std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
 
   struct Block {
-    unsigned PrevCodeSize;
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
     unsigned StartSizeWord;
     std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
-    Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+    Block(const NaClBitcodeSelectorAbbrev& PCS, unsigned SSW)
+        : PrevCodeSize(PCS), StartSizeWord(SSW) {}
   };
 
   /// BlockScope - This tracks the current blocks that we have entered.
@@ -94,7 +95,7 @@ class NaClBitstreamWriter {
 
 public:
   explicit NaClBitstreamWriter(SmallVectorImpl<char> &O)
-    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+      : Out(O), CurBit(0), CurValue(0), CurCodeSize() {}
 
   ~NaClBitstreamWriter() {
     assert(CurBit == 0 && "Unflused data remaining");
@@ -156,6 +157,7 @@ public:
 
   void EmitVBR(uint32_t Val, unsigned NumBits) {
     assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
     uint32_t Threshold = 1U << (NumBits-1);
 
     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
@@ -169,6 +171,7 @@ public:
 
   void EmitVBR64(uint64_t Val, unsigned NumBits) {
     assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
     if ((uint32_t)Val == Val)
       return EmitVBR((uint32_t)Val, NumBits);
 
@@ -186,7 +189,10 @@ public:
 
   /// EmitCode - Emit the specified code.
   void EmitCode(unsigned Val) {
-    Emit(Val, CurCodeSize);
+    if (CurCodeSize.IsFixed)
+      Emit(Val, CurCodeSize.NumBits);
+    else
+      EmitVBR(Val, CurCodeSize.NumBits);
   }
 
   //===--------------------------------------------------------------------===//
@@ -207,16 +213,22 @@ public:
     return 0;
   }
 
-  void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
+private:
+  // Enter block using CodeLen bits to read the size of the code
+  // selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen,
+                     BlockInfo *Info) {
     // Block header:
     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
     EmitCode(naclbitc::ENTER_SUBBLOCK);
     EmitVBR(BlockID, naclbitc::BlockIDWidth);
-    EmitVBR(CodeLen, naclbitc::CodeLenWidth);
+    assert(CodeLen.IsFixed && "Block codelens must be fixed");
+    EmitVBR(CodeLen.NumBits, naclbitc::CodeLenWidth);
     FlushToWord();
 
     unsigned BlockSizeWordIndex = GetWordIndex();
-    unsigned OldCodeSize = CurCodeSize;
+    NaClBitcodeSelectorAbbrev OldCodeSize(CurCodeSize);
 
     // Emit a placeholder, which will be replaced when the block is popped.
     Emit(0, naclbitc::BlockSizeWidth);
@@ -230,7 +242,7 @@ public:
 
     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
     // to the abbrev list.
-    if (BlockInfo *Info = getBlockInfo(BlockID)) {
+    if (Info) {
       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
            i != e; ++i) {
         CurAbbrevs.push_back(Info->Abbrevs[i]);
@@ -239,6 +251,31 @@ public:
     }
   }
 
+public:
+  /// \brief Enter block using CodeLen bits to read the size of the code
+  /// selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen) {
+    EnterSubblock(BlockID, CodeLen, getBlockInfo(BlockID));
+  }
+
+  /// \brief Enter block, using a code length based on the number of
+  /// (global) BlockInfo entries defined for the block. Note: This
+  /// should be used only if the block doesn't define any local abbreviations.
+  void EnterSubblock(unsigned BlockID) {
+    BlockInfo *Info = getBlockInfo(BlockID);
+    size_t NumAbbrevs = Info ? Info->Abbrevs.size() : 0;
+    NaClBitcodeSelectorAbbrev DefaultCodeLen(
+        naclbitc::DEFAULT_MAX_ABBREV+NumAbbrevs);
+    EnterSubblock(BlockID, DefaultCodeLen, Info);
+  }
+
+  /// \brief Enter block with the given number of abbreviations.
+  void EnterSubblock(unsigned BlockID, unsigned NumAbbrev) {
+    NaClBitcodeSelectorAbbrev CodeLenAbbrev(NumAbbrev);
+    EnterSubblock(BlockID, CodeLenAbbrev);
+  }
+
   void ExitBlock() {
     assert(!BlockScope.empty() && "Block scope imbalance!");
 
@@ -501,8 +538,8 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
-  void EnterBlockInfoBlock(unsigned CodeWidth) {
-    EnterSubblock(naclbitc::BLOCKINFO_BLOCK_ID, CodeWidth);
+  void EnterBlockInfoBlock() {
+    EnterSubblock(naclbitc::BLOCKINFO_BLOCK_ID);
     BlockInfoCurBID = ~0U;
   }
 private:
author	Karl Schimpf <kschimpf@google.com>	2013-05-24 09:55:03 -0700
committer	Karl Schimpf <kschimpf@google.com>	2013-05-24 09:55:03 -0700
commit	80b7ba7480724c773b96da24999d817b6b46ef29 (patch)
tree	6b7dba38623ef94b89b43f7cba898c1b8017294f /include
parent	501900058c0815d35e630d416e3d4e1373b75b9c (diff)