2 files changed, 228 insertions, 95 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp
index 0cd9315f60..cff6dd8f04 100644
--- a/Driver/CacheTokens.cpp
+++ b/Driver/CacheTokens.cpp
@@ -47,9 +47,10 @@ static void Emit32(llvm::raw_ostream& Out, uint32_t V) {
   Out << (unsigned char)(V >> 24);
 }
 
-static void Pad(llvm::raw_fd_ostream& Out, unsigned Alignment) {
-  Offset off = (Offset) Out.tell();  
-  for (unsigned Pad = off % Alignment ; Pad != 0 ; --Pad, ++off) Emit8(Out, 0);
+static void Pad(llvm::raw_fd_ostream& Out, unsigned A) {
+  Offset off = (Offset) Out.tell();
+  uint32_t n = ((uintptr_t)(off+A-1) & ~(uintptr_t)(A-1)) - off;
+  for ( ; n ; --n ) Emit8(Out, 0);
 }
 
 //===----------------------------------------------------------------------===//
@@ -65,13 +66,13 @@ class OnDiskChainedHashTableGenerator {
   
   class Item {
   public:
-    typename Info::KeyT key;
-    typename Info::DataT data;
+    typename Info::key_type key;
+    typename Info::data_type data;
     Item *next;
     const uint32_t hash;
     
-    Item(typename Info::KeyT_ref k, typename Info::DataT_ref d)
-    : key(k), data(d), next(0), hash(Info::getHash(k)) {}
+    Item(typename Info::key_type_ref k, typename Info::data_type_ref d)
+    : key(k), data(d), next(0), hash(Info::ComputeHash(k)) {}
   };
   
   class Bucket { 
@@ -86,7 +87,7 @@ class OnDiskChainedHashTableGenerator {
   Bucket* Buckets;
   
 private:
-  void insert(Item** b, size_t size, Item* E) {
+  void insert(Bucket* b, size_t size, Item* E) {
     unsigned idx = E->hash & (size - 1);
     Bucket& B = b[idx];
     E->next = B.head;
@@ -95,12 +96,12 @@ private:
   }
   
   void resize(size_t newsize) {
-    Bucket* newBuckets = calloc(newsize, sizeof(Bucket));
-    
+    Bucket* newBuckets = (Bucket*) calloc(newsize, sizeof(Bucket));
+    // Populate newBuckets with the old entries.
     for (unsigned i = 0; i < NumBuckets; ++i)
-      for (Item* E = Buckets[i]; E ; ) {
+      for (Item* E = Buckets[i].head; E ; ) {
         Item* N = E->next;
-        E->Next = 0;
+        E->next = 0;
         insert(newBuckets, newsize, E);
         E = N;
       }
@@ -112,7 +113,9 @@ private:
   
 public:
   
-  void insert(typename Info::Key_ref key, typename Info::DataT_ref data) {
+  void insert(typename Info::key_type_ref key,
+              typename Info::data_type_ref data) {
+
     ++NumEntries;
     if (4*NumEntries >= 3*NumBuckets) resize(NumBuckets*2);
     insert(Buckets, NumBuckets, new (BA.Allocate<Item>()) Item(key, data));
@@ -125,25 +128,26 @@ public:
       if (!B.head) continue;
       
       // Store the offset for the data of this bucket.
-      Pad(out, 4); // 4-byte alignment.
       B.off = out.tell();
       
-      // Write out the number of items in the bucket.  We just write out
-      // 4 bytes to keep things 4-byte aligned.
-      Emit32(out, B.length);
+      // Write out the number of items in the bucket.
+      Emit16(out, B.length);
       
       // Write out the entries in the bucket.
       for (Item *I = B.head; I ; I = I->next) {
         Emit32(out, I->hash);
-        Info::EmitKey(out, I->key);
-        Info::EmitData(out, I->data);
+        const std::pair<unsigned, unsigned>& Len = 
+          Info::EmitKeyDataLength(out, I->key, I->data);
+        Info::EmitKey(out, I->key, Len.first);
+        Info::EmitData(out, I->data, Len.second);
       }
     }
     
     // Emit the hashtable itself.
     Pad(out, 4);
     Offset TableOff = out.tell();
-    Emit32(out, NumBuckets);    
+    Emit32(out, NumBuckets);
+    Emit32(out, NumEntries);
     for (unsigned i = 0; i < NumBuckets; ++i) Emit32(out, Buckets[i].off);
     
     return TableOff;
@@ -151,8 +155,10 @@ public:
   
   OnDiskChainedHashTableGenerator() {
     NumEntries = 0;
-    NumBuckets = 64;
-    Buckets = calloc(NumBuckets, sizeof(Bucket));
+    NumBuckets = 64;    
+    // Note that we do not need to run the constructors of the individual
+    // Bucket objects since 'calloc' returns bytes that are all 0.
+    Buckets = (Bucket*) calloc(NumBuckets, sizeof(Bucket));
   }
   
   ~OnDiskChainedHashTableGenerator() {
@@ -178,6 +184,44 @@ public:
   Offset getPPCondTableOffset() const { return PPCondData; }
 };
   
+class VISIBILITY_HIDDEN FileEntryPCHEntryInfo {
+public:
+  typedef const FileEntry* key_type;
+  typedef key_type key_type_ref;
+  
+  typedef PCHEntry data_type;
+  typedef const PCHEntry& data_type_ref;
+  
+  static unsigned ComputeHash(const FileEntry* FE) {
+    // Bernstein hash function:
+    // This is basically copy-and-paste from StringMap.  This likely won't
+    // stay here, which is why I didn't both to expose this function from
+    // String Map.  There are plenty of other hash functions which are likely
+    // to perform better and be faster.
+    unsigned int R = 0;
+    for (const char* x = FE->getName(); *x != '\0' ; ++x) R = R * 33 + *x;
+    return R + (R >> 5);
+  }
+  
+  static std::pair<unsigned,unsigned> 
+  EmitKeyDataLength(llvm::raw_ostream& Out, const FileEntry* FE,
+                    const PCHEntry& E) {
+
+    unsigned n = strlen(FE->getName()) + 1;
+    ::Emit16(Out, n);
+    return std::make_pair(n, 8);
+  }
+  
+  static void EmitKey(llvm::raw_ostream& Out, const FileEntry* FE, unsigned n) {
+    Out.write(FE->getName(), n);
+  }
+  
+  static void EmitData(llvm::raw_ostream& Out, const PCHEntry& E, unsigned) {
+    ::Emit32(Out, E.getTokenOffset());
+    ::Emit32(Out, E.getPPCondTableOffset());
+  }        
+};
+  
 class OffsetOpt {
   bool valid;
   Offset off;
@@ -189,7 +233,7 @@ public:
 };
 } // end anonymous namespace
 
-typedef llvm::DenseMap<const FileEntry*, PCHEntry> PCHMap;
+typedef OnDiskChainedHashTableGenerator<FileEntryPCHEntryInfo> PCHMap;
 typedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap;
 typedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy;
 
@@ -547,14 +591,14 @@ void PTHWriter::GeneratePTH() {
     if (!P.isAbsolute())
       continue;
 
-    assert(!PM.count(FE) && "fileinfo's are not uniqued on FileEntry?");
+    // assert(!PM.count(FE) && "fileinfo's are not uniqued on FileEntry?");
     
     const llvm::MemoryBuffer *B = C.getBuffer();
     if (!B) continue;
 
     FileID FID = SM.createFileID(FE, SourceLocation(), SrcMgr::C_User);
     Lexer L(FID, SM, LOpts);
-    PM[FE] = LexTokens(L);
+    PM.insert(FE, LexTokens(L));
   }
 
   // Write out the identifier table.
@@ -607,22 +651,5 @@ void clang::CacheTokens(Preprocessor& PP, const std::string& OutFile) {
 //===----------------------------------------------------------------------===//
 
 Offset PTHWriter::EmitFileTable() {
-  // Determine the offset where this table appears in the PTH file.
-  Offset off = (Offset) Out.tell();
-  
-  // Output the size of the table.
-  Emit32(PM.size());
-  
-  for (PCHMap::iterator I=PM.begin(), E=PM.end(); I!=E; ++I) {
-    const FileEntry* FE = I->first;
-    const char* Name = FE->getName();
-    unsigned size = strlen(Name);
-    Emit32(size);
-    EmitBuf(Name, Name+size);
-    Emit32(I->second.getTokenOffset());
-    Emit32(I->second.getPPCondTableOffset());
-  }
-  
-  return off;
+  return PM.Emit(Out);
 }
-
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 8bbf284b32..2f4ac986e4 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -34,12 +34,21 @@ using namespace clang;
 //===----------------------------------------------------------------------===//
 
 static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) {
-  uint16_t V = ((uint16_t)Data[0] <<  0) |
+  uint16_t V = ((uint16_t)Data[0]) |
                ((uint16_t)Data[1] <<  8);
   Data += 2;
   return V;
 }
 
+static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) {
+  uint32_t V = ((uint32_t)Data[0])  |
+               ((uint32_t)Data[1] << 8)  |
+               ((uint32_t)Data[2] << 16) |
+               ((uint32_t)Data[3] << 24);
+  Data += 4;
+  return V;
+}
+
 static inline uint32_t ReadLE32(const unsigned char *&Data) {
   // Hosts that directly support little-endian 32-bit loads can just
   // use them.  Big-endian hosts need a bswap.
@@ -306,70 +315,166 @@ SourceLocation PTHLexer::getSourceLocation() {
 }
 
 //===----------------------------------------------------------------------===//
-// Internal Data Structures for PTH file lookup and resolving identifiers.
+// OnDiskChainedHashTable
 //===----------------------------------------------------------------------===//
 
+template<typename Info>
+class OnDiskChainedHashTable {
+  const unsigned NumBuckets;
+  const unsigned NumEntries;
+  const unsigned char* const Buckets;
+  const unsigned char* const Base;
+public:
+  typedef typename Info::internal_key_type internal_key_type;
+  typedef typename Info::external_key_type external_key_type;
+  typedef typename Info::data_type         data_type;
+  
+  OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
+                         const unsigned char* buckets,
+                         const unsigned char* base)
+    : NumBuckets(numBuckets), NumEntries(numEntries),
+      Buckets(buckets), Base(base) {        
+        assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
+               "'buckets' must have a 4-byte alignment");
+      }
+
+  
+  bool isEmpty() const { return NumEntries == 0; }
+  
+  class iterator {
+    const unsigned char* const data;
+    const unsigned len;
+  public:
+    iterator() : data(0), len(0) {}
+    iterator(const unsigned char* d, unsigned l) : data(d), len(l) {}
+    
+    data_type operator*() const { return Info::ReadData(data, len); }    
+    bool operator==(const iterator& X) const { return X.data == data; }    
+    bool operator!=(const iterator& X) const { return X.data != data; }
+  };    
+  
+  iterator find(const external_key_type& eKey) {
+    const internal_key_type& iKey = Info::GetInternalKey(eKey);
+    unsigned key_hash = Info::ComputeHash(iKey);
+    
+    // Each bucket is just a 32-bit offset into the PTH file.
+    unsigned idx = key_hash & (NumBuckets - 1);
+    const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
+    
+    unsigned offset = ReadLE32(Bucket);
+    if (offset == 0) return iterator(); // Empty bucket.
+    const unsigned char* Items = Base + offset;
+    
+    // 'Items' starts with a 16-bit unsigned integer representing the
+    // number of items in this bucket.
+    unsigned len = ReadUnalignedLE16(Items);
+    
+    for (unsigned i = 0; i < len; ++i) {
+      // Read the hash.
+      uint32_t item_hash = ReadUnalignedLE32(Items);
+      
+      // Determine the length of the key and the data.
+      const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items);      
+      unsigned item_len = L.first + L.second;
+
+      // Compare the hashes.  If they are not the same, skip the entry entirely.
+      if (item_hash != key_hash) {
+        Items += item_len;
+        continue;
+      }
+      
+      // Read the key.
+      const internal_key_type& X =
+        Info::ReadKey((const unsigned char* const) Items, L.first);
+
+      // If the key doesn't match just skip reading the value.
+      if (!Info::EqualKey(X, iKey)) {
+        Items += item_len;
+        continue;
+      }
+      
+      // The key matches!
+      return iterator(Items + L.first, L.second);
+    }
+    
+    return iterator();
+  }
+  
+  iterator end() const { return iterator(); }
+  
+  
+  static OnDiskChainedHashTable* Create(const unsigned char* buckets,
+                                        const unsigned char* const base) {
+
+    assert(buckets > base);
+    assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
+           "buckets should be 4-byte aligned.");
+    
+    unsigned numBuckets = ReadLE32(buckets);
+    unsigned numEntries = ReadLE32(buckets);
+    return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
+                                            base);
+  }  
+};
+
+//===----------------------------------------------------------------------===//
+// PTH file lookup: map from strings to file data.
+//===----------------------------------------------------------------------===//
 
 /// PTHFileLookup - This internal data structure is used by the PTHManager
 ///  to map from FileEntry objects managed by FileManager to offsets within
 ///  the PTH file.
 namespace {
-class VISIBILITY_HIDDEN PTHFileLookup {
+class VISIBILITY_HIDDEN PTHFileData {
+  const uint32_t TokenOff;
+  const uint32_t PPCondOff;
 public:
-  class Val {
-    uint32_t TokenOff;
-    uint32_t PPCondOff;
-  public:
-    Val() : TokenOff(~0) {}
-    Val(uint32_t toff, uint32_t poff)
-      : TokenOff(toff), PPCondOff(poff) {}
+  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
+    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
     
-    bool isValid() const { return TokenOff != ~((uint32_t)0); }
-
-    uint32_t getTokenOffset() const {
-      assert(isValid() && "PTHFileLookup entry initialized.");
-      return TokenOff;
-    }
-    
-    uint32_t getPPCondOffset() const {
-      assert(isValid() && "PTHFileLookup entry initialized.");
-      return PPCondOff;
-    }    
-  };
-  
-private:
-  llvm::StringMap<Val> FileMap;
+  uint32_t getTokenOffset() const { return TokenOff; }  
+  uint32_t getPPCondOffset() const { return PPCondOff; }  
+};
   
+class VISIBILITY_HIDDEN PTHFileLookupTrait {
 public:
-  PTHFileLookup() {};
+  typedef PTHFileData      data_type;
+  typedef const FileEntry* external_key_type;
+  typedef const char*      internal_key_type;
   
-  bool isEmpty() const {
-    return FileMap.empty();
+  static bool EqualKey(const char* a, const char* b) {
+    return strcmp(a, b) == 0;
+  }
+
+  static unsigned ComputeHash(const char* x) {
+    // More copy-paste nonsense.  Will refactor.
+    unsigned int R = 0;
+    for (; *x != '\0' ; ++x) R = R * 33 + *x;
+    return R + (R >> 5);
+  }
+
+  static const char* GetInternalKey(const FileEntry* FE) {
+    return FE->getName();
   }
   
-  Val Lookup(const FileEntry* FE) {
-    const char* s = FE->getName();
-    unsigned size = strlen(s);
-    return FileMap.GetOrCreateValue(s, s+size).getValue();
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U);
   }
   
-  void ReadTable(const unsigned char* D) {    
-    uint32_t N = ReadLE32(D);     // Read the length of the table.
-    
-    for ( ; N > 0; --N) {       // The rest of the data is the table itself.
-      uint32_t Len = ReadLE32(D);
-      const char* s = (const char *)D;
-      D += Len;
-
-      uint32_t TokenOff = ReadLE32(D);
-      uint32_t PPCondOff = ReadLE32(D);
-
-      FileMap.GetOrCreateValue(s, s+Len).getValue() =
-        Val(TokenOff, PPCondOff);
-    }
+  static const char* ReadKey(const unsigned char* d, unsigned) {
+    return (const char*) d;
+  }
+  
+  static PTHFileData ReadData(const unsigned char* d, unsigned) {
+    uint32_t x = ::ReadUnalignedLE32(d);
+    uint32_t y = ::ReadUnalignedLE32(d);
+    return PTHFileData(x, y); 
   }
 };
-} // end anonymous namespace
+} // end anonymous namespace  
+
+typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup;
 
 //===----------------------------------------------------------------------===//
 // PTHManager methods.
@@ -454,9 +559,7 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) {
     return 0; // FIXME: Proper error diagnostic?
   }
   
-  llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
-  FL->ReadTable(FileTable);
-
+  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
   if (FL->isEmpty()) {
     InvalidPTH(Diags, "PTH file contains no cached source data");
     return 0;
@@ -579,11 +682,14 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) {
   // Lookup the FileEntry object in our file lookup data structure.  It will
   // return a variant that indicates whether or not there is an offset within
   // the PTH file that contains cached tokens.
-  PTHFileLookup::Val FileData = ((PTHFileLookup*)FileLookup)->Lookup(FE);
+  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
+  PTHFileLookup::iterator I = PFL.find(FE);
   
-  if (!FileData.isValid()) // No tokens available.
+  if (I == PFL.end()) // No tokens available?
     return 0;
   
+  const PTHFileData& FileData = *I;  
+  
   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
   // Compute the offset of the token data within the buffer.
   const unsigned char* data = BufStart + FileData.getTokenOffset();