diff options
-rw-r--r-- | Driver/CacheTokens.cpp | 113 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 210 |
2 files changed, 228 insertions, 95 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp index 0cd9315f60..cff6dd8f04 100644 --- a/Driver/CacheTokens.cpp +++ b/Driver/CacheTokens.cpp @@ -47,9 +47,10 @@ static void Emit32(llvm::raw_ostream& Out, uint32_t V) { Out << (unsigned char)(V >> 24); } -static void Pad(llvm::raw_fd_ostream& Out, unsigned Alignment) { - Offset off = (Offset) Out.tell(); - for (unsigned Pad = off % Alignment ; Pad != 0 ; --Pad, ++off) Emit8(Out, 0); +static void Pad(llvm::raw_fd_ostream& Out, unsigned A) { + Offset off = (Offset) Out.tell(); + uint32_t n = ((uintptr_t)(off+A-1) & ~(uintptr_t)(A-1)) - off; + for ( ; n ; --n ) Emit8(Out, 0); } //===----------------------------------------------------------------------===// @@ -65,13 +66,13 @@ class OnDiskChainedHashTableGenerator { class Item { public: - typename Info::KeyT key; - typename Info::DataT data; + typename Info::key_type key; + typename Info::data_type data; Item *next; const uint32_t hash; - Item(typename Info::KeyT_ref k, typename Info::DataT_ref d) - : key(k), data(d), next(0), hash(Info::getHash(k)) {} + Item(typename Info::key_type_ref k, typename Info::data_type_ref d) + : key(k), data(d), next(0), hash(Info::ComputeHash(k)) {} }; class Bucket { @@ -86,7 +87,7 @@ class OnDiskChainedHashTableGenerator { Bucket* Buckets; private: - void insert(Item** b, size_t size, Item* E) { + void insert(Bucket* b, size_t size, Item* E) { unsigned idx = E->hash & (size - 1); Bucket& B = b[idx]; E->next = B.head; @@ -95,12 +96,12 @@ private: } void resize(size_t newsize) { - Bucket* newBuckets = calloc(newsize, sizeof(Bucket)); - + Bucket* newBuckets = (Bucket*) calloc(newsize, sizeof(Bucket)); + // Populate newBuckets with the old entries. for (unsigned i = 0; i < NumBuckets; ++i) - for (Item* E = Buckets[i]; E ; ) { + for (Item* E = Buckets[i].head; E ; ) { Item* N = E->next; - E->Next = 0; + E->next = 0; insert(newBuckets, newsize, E); E = N; } @@ -112,7 +113,9 @@ private: public: - void insert(typename Info::Key_ref key, typename Info::DataT_ref data) { + void insert(typename Info::key_type_ref key, + typename Info::data_type_ref data) { + ++NumEntries; if (4*NumEntries >= 3*NumBuckets) resize(NumBuckets*2); insert(Buckets, NumBuckets, new (BA.Allocate<Item>()) Item(key, data)); @@ -125,25 +128,26 @@ public: if (!B.head) continue; // Store the offset for the data of this bucket. - Pad(out, 4); // 4-byte alignment. B.off = out.tell(); - // Write out the number of items in the bucket. We just write out - // 4 bytes to keep things 4-byte aligned. - Emit32(out, B.length); + // Write out the number of items in the bucket. + Emit16(out, B.length); // Write out the entries in the bucket. for (Item *I = B.head; I ; I = I->next) { Emit32(out, I->hash); - Info::EmitKey(out, I->key); - Info::EmitData(out, I->data); + const std::pair<unsigned, unsigned>& Len = + Info::EmitKeyDataLength(out, I->key, I->data); + Info::EmitKey(out, I->key, Len.first); + Info::EmitData(out, I->data, Len.second); } } // Emit the hashtable itself. Pad(out, 4); Offset TableOff = out.tell(); - Emit32(out, NumBuckets); + Emit32(out, NumBuckets); + Emit32(out, NumEntries); for (unsigned i = 0; i < NumBuckets; ++i) Emit32(out, Buckets[i].off); return TableOff; @@ -151,8 +155,10 @@ public: OnDiskChainedHashTableGenerator() { NumEntries = 0; - NumBuckets = 64; - Buckets = calloc(NumBuckets, sizeof(Bucket)); + NumBuckets = 64; + // Note that we do not need to run the constructors of the individual + // Bucket objects since 'calloc' returns bytes that are all 0. + Buckets = (Bucket*) calloc(NumBuckets, sizeof(Bucket)); } ~OnDiskChainedHashTableGenerator() { @@ -178,6 +184,44 @@ public: Offset getPPCondTableOffset() const { return PPCondData; } }; +class VISIBILITY_HIDDEN FileEntryPCHEntryInfo { +public: + typedef const FileEntry* key_type; + typedef key_type key_type_ref; + + typedef PCHEntry data_type; + typedef const PCHEntry& data_type_ref; + + static unsigned ComputeHash(const FileEntry* FE) { + // Bernstein hash function: + // This is basically copy-and-paste from StringMap. This likely won't + // stay here, which is why I didn't both to expose this function from + // String Map. There are plenty of other hash functions which are likely + // to perform better and be faster. + unsigned int R = 0; + for (const char* x = FE->getName(); *x != '\0' ; ++x) R = R * 33 + *x; + return R + (R >> 5); + } + + static std::pair<unsigned,unsigned> + EmitKeyDataLength(llvm::raw_ostream& Out, const FileEntry* FE, + const PCHEntry& E) { + + unsigned n = strlen(FE->getName()) + 1; + ::Emit16(Out, n); + return std::make_pair(n, 8); + } + + static void EmitKey(llvm::raw_ostream& Out, const FileEntry* FE, unsigned n) { + Out.write(FE->getName(), n); + } + + static void EmitData(llvm::raw_ostream& Out, const PCHEntry& E, unsigned) { + ::Emit32(Out, E.getTokenOffset()); + ::Emit32(Out, E.getPPCondTableOffset()); + } +}; + class OffsetOpt { bool valid; Offset off; @@ -189,7 +233,7 @@ public: }; } // end anonymous namespace -typedef llvm::DenseMap<const FileEntry*, PCHEntry> PCHMap; +typedef OnDiskChainedHashTableGenerator<FileEntryPCHEntryInfo> PCHMap; typedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap; typedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy; @@ -547,14 +591,14 @@ void PTHWriter::GeneratePTH() { if (!P.isAbsolute()) continue; - assert(!PM.count(FE) && "fileinfo's are not uniqued on FileEntry?"); + // assert(!PM.count(FE) && "fileinfo's are not uniqued on FileEntry?"); const llvm::MemoryBuffer *B = C.getBuffer(); if (!B) continue; FileID FID = SM.createFileID(FE, SourceLocation(), SrcMgr::C_User); Lexer L(FID, SM, LOpts); - PM[FE] = LexTokens(L); + PM.insert(FE, LexTokens(L)); } // Write out the identifier table. @@ -607,22 +651,5 @@ void clang::CacheTokens(Preprocessor& PP, const std::string& OutFile) { //===----------------------------------------------------------------------===// Offset PTHWriter::EmitFileTable() { - // Determine the offset where this table appears in the PTH file. - Offset off = (Offset) Out.tell(); - - // Output the size of the table. - Emit32(PM.size()); - - for (PCHMap::iterator I=PM.begin(), E=PM.end(); I!=E; ++I) { - const FileEntry* FE = I->first; - const char* Name = FE->getName(); - unsigned size = strlen(Name); - Emit32(size); - EmitBuf(Name, Name+size); - Emit32(I->second.getTokenOffset()); - Emit32(I->second.getPPCondTableOffset()); - } - - return off; + return PM.Emit(Out); } - diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index 8bbf284b32..2f4ac986e4 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -34,12 +34,21 @@ using namespace clang; //===----------------------------------------------------------------------===// static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) { - uint16_t V = ((uint16_t)Data[0] << 0) | + uint16_t V = ((uint16_t)Data[0]) | ((uint16_t)Data[1] << 8); Data += 2; return V; } +static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) { + uint32_t V = ((uint32_t)Data[0]) | + ((uint32_t)Data[1] << 8) | + ((uint32_t)Data[2] << 16) | + ((uint32_t)Data[3] << 24); + Data += 4; + return V; +} + static inline uint32_t ReadLE32(const unsigned char *&Data) { // Hosts that directly support little-endian 32-bit loads can just // use them. Big-endian hosts need a bswap. @@ -306,70 +315,166 @@ SourceLocation PTHLexer::getSourceLocation() { } //===----------------------------------------------------------------------===// -// Internal Data Structures for PTH file lookup and resolving identifiers. +// OnDiskChainedHashTable //===----------------------------------------------------------------------===// +template<typename Info> +class OnDiskChainedHashTable { + const unsigned NumBuckets; + const unsigned NumEntries; + const unsigned char* const Buckets; + const unsigned char* const Base; +public: + typedef typename Info::internal_key_type internal_key_type; + typedef typename Info::external_key_type external_key_type; + typedef typename Info::data_type data_type; + + OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries, + const unsigned char* buckets, + const unsigned char* base) + : NumBuckets(numBuckets), NumEntries(numEntries), + Buckets(buckets), Base(base) { + assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && + "'buckets' must have a 4-byte alignment"); + } + + + bool isEmpty() const { return NumEntries == 0; } + + class iterator { + const unsigned char* const data; + const unsigned len; + public: + iterator() : data(0), len(0) {} + iterator(const unsigned char* d, unsigned l) : data(d), len(l) {} + + data_type operator*() const { return Info::ReadData(data, len); } + bool operator==(const iterator& X) const { return X.data == data; } + bool operator!=(const iterator& X) const { return X.data != data; } + }; + + iterator find(const external_key_type& eKey) { + const internal_key_type& iKey = Info::GetInternalKey(eKey); + unsigned key_hash = Info::ComputeHash(iKey); + + // Each bucket is just a 32-bit offset into the PTH file. + unsigned idx = key_hash & (NumBuckets - 1); + const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx; + + unsigned offset = ReadLE32(Bucket); + if (offset == 0) return iterator(); // Empty bucket. + const unsigned char* Items = Base + offset; + + // 'Items' starts with a 16-bit unsigned integer representing the + // number of items in this bucket. + unsigned len = ReadUnalignedLE16(Items); + + for (unsigned i = 0; i < len; ++i) { + // Read the hash. + uint32_t item_hash = ReadUnalignedLE32(Items); + + // Determine the length of the key and the data. + const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items); + unsigned item_len = L.first + L.second; + + // Compare the hashes. If they are not the same, skip the entry entirely. + if (item_hash != key_hash) { + Items += item_len; + continue; + } + + // Read the key. + const internal_key_type& X = + Info::ReadKey((const unsigned char* const) Items, L.first); + + // If the key doesn't match just skip reading the value. + if (!Info::EqualKey(X, iKey)) { + Items += item_len; + continue; + } + + // The key matches! + return iterator(Items + L.first, L.second); + } + + return iterator(); + } + + iterator end() const { return iterator(); } + + + static OnDiskChainedHashTable* Create(const unsigned char* buckets, + const unsigned char* const base) { + + assert(buckets > base); + assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && + "buckets should be 4-byte aligned."); + + unsigned numBuckets = ReadLE32(buckets); + unsigned numEntries = ReadLE32(buckets); + return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets, + base); + } +}; + +//===----------------------------------------------------------------------===// +// PTH file lookup: map from strings to file data. +//===----------------------------------------------------------------------===// /// PTHFileLookup - This internal data structure is used by the PTHManager /// to map from FileEntry objects managed by FileManager to offsets within /// the PTH file. namespace { -class VISIBILITY_HIDDEN PTHFileLookup { +class VISIBILITY_HIDDEN PTHFileData { + const uint32_t TokenOff; + const uint32_t PPCondOff; public: - class Val { - uint32_t TokenOff; - uint32_t PPCondOff; - public: - Val() : TokenOff(~0) {} - Val(uint32_t toff, uint32_t poff) - : TokenOff(toff), PPCondOff(poff) {} + PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) + : TokenOff(tokenOff), PPCondOff(ppCondOff) {} - bool isValid() const { return TokenOff != ~((uint32_t)0); } - - uint32_t getTokenOffset() const { - assert(isValid() && "PTHFileLookup entry initialized."); - return TokenOff; - } - - uint32_t getPPCondOffset() const { - assert(isValid() && "PTHFileLookup entry initialized."); - return PPCondOff; - } - }; - -private: - llvm::StringMap<Val> FileMap; + uint32_t getTokenOffset() const { return TokenOff; } + uint32_t getPPCondOffset() const { return PPCondOff; } +}; +class VISIBILITY_HIDDEN PTHFileLookupTrait { public: - PTHFileLookup() {}; + typedef PTHFileData data_type; + typedef const FileEntry* external_key_type; + typedef const char* internal_key_type; - bool isEmpty() const { - return FileMap.empty(); + static bool EqualKey(const char* a, const char* b) { + return strcmp(a, b) == 0; + } + + static unsigned ComputeHash(const char* x) { + // More copy-paste nonsense. Will refactor. + unsigned int R = 0; + for (; *x != '\0' ; ++x) R = R * 33 + *x; + return R + (R >> 5); + } + + static const char* GetInternalKey(const FileEntry* FE) { + return FE->getName(); } - Val Lookup(const FileEntry* FE) { - const char* s = FE->getName(); - unsigned size = strlen(s); - return FileMap.GetOrCreateValue(s, s+size).getValue(); + static std::pair<unsigned, unsigned> + ReadKeyDataLength(const unsigned char*& d) { + return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U); } - void ReadTable(const unsigned char* D) { - uint32_t N = ReadLE32(D); // Read the length of the table. - - for ( ; N > 0; --N) { // The rest of the data is the table itself. - uint32_t Len = ReadLE32(D); - const char* s = (const char *)D; - D += Len; - - uint32_t TokenOff = ReadLE32(D); - uint32_t PPCondOff = ReadLE32(D); - - FileMap.GetOrCreateValue(s, s+Len).getValue() = - Val(TokenOff, PPCondOff); - } + static const char* ReadKey(const unsigned char* d, unsigned) { + return (const char*) d; + } + + static PTHFileData ReadData(const unsigned char* d, unsigned) { + uint32_t x = ::ReadUnalignedLE32(d); + uint32_t y = ::ReadUnalignedLE32(d); + return PTHFileData(x, y); } }; -} // end anonymous namespace +} // end anonymous namespace + +typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; //===----------------------------------------------------------------------===// // PTHManager methods. @@ -454,9 +559,7 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) { return 0; // FIXME: Proper error diagnostic? } - llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); - FL->ReadTable(FileTable); - + llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); if (FL->isEmpty()) { InvalidPTH(Diags, "PTH file contains no cached source data"); return 0; @@ -579,11 +682,14 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { // Lookup the FileEntry object in our file lookup data structure. It will // return a variant that indicates whether or not there is an offset within // the PTH file that contains cached tokens. - PTHFileLookup::Val FileData = ((PTHFileLookup*)FileLookup)->Lookup(FE); + PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); + PTHFileLookup::iterator I = PFL.find(FE); - if (!FileData.isValid()) // No tokens available. + if (I == PFL.end()) // No tokens available? return 0; + const PTHFileData& FileData = *I; + const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); // Compute the offset of the token data within the buffer. const unsigned char* data = BufStart + FileData.getTokenOffset(); |