diff options
-rw-r--r-- | Driver/CacheTokens.cpp | 23 | ||||
-rw-r--r-- | Driver/clang.cpp | 31 | ||||
-rw-r--r-- | include/clang/Basic/IdentifierTable.h | 52 | ||||
-rw-r--r-- | include/clang/Lex/PTHManager.h | 45 | ||||
-rw-r--r-- | include/clang/Lex/Preprocessor.h | 4 | ||||
-rw-r--r-- | lib/Basic/IdentifierTable.cpp | 14 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 85 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 5 |
8 files changed, 207 insertions, 52 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp index 2548d3d7dd..d791e09d6c 100644 --- a/Driver/CacheTokens.cpp +++ b/Driver/CacheTokens.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" using namespace clang; @@ -179,8 +180,20 @@ public: CompareIDDataIndex(IDData* table) : Table(table) {} bool operator()(unsigned i, unsigned j) const { - // Assume that IdentifierInfo::getName() returns a '\0' terminated string. - return strcmp(Table[i].II->getName(), Table[j].II->getName()) < 0; + const IdentifierInfo* II_i = Table[i].II; + const IdentifierInfo* II_j = Table[j].II; + + unsigned i_len = II_i->getLength(); + unsigned j_len = II_j->getLength(); + + if (i_len > j_len) + return false; + + if (i_len < j_len) + return true; + + // Otherwise, compare the strings themselves! + return strncmp(II_i->getName(), II_j->getName(), i_len) < 0; } }; } @@ -221,7 +234,10 @@ PTHWriter::EmitIdentifierTable() { unsigned len = d.II->getLength(); // Write out the string length. Emit32(len); const char* buf = d.II->getName(); // Write out the string data. - EmitBuf(buf, buf+len); + EmitBuf(buf, buf+len); + // Emit a null character for those clients expecting that IdentifierInfo + // strings are null terminated. + Emit8('\0'); } // Now emit the table mapping from persistent IDs to PTH file offsets. @@ -229,7 +245,6 @@ PTHWriter::EmitIdentifierTable() { Emit32(idcount); // Emit the number of identifiers. for (unsigned i = 0 ; i < idcount; ++i) Emit32(IIDMap[i].FileOffset); - return std::make_pair(DataOff, std::make_pair(IDOff, LexicalOff)); } diff --git a/Driver/clang.cpp b/Driver/clang.cpp index c5aac3773e..b7aea6799e 100644 --- a/Driver/clang.cpp +++ b/Driver/clang.cpp @@ -955,12 +955,6 @@ static bool InitializePreprocessor(Preprocessor &PP, PredefineBuffer.push_back(0); PP.setPredefines(&PredefineBuffer[0]); - // Use PTH. - if (!TokenCache.empty()) { - PTHManager* PM = PTHManager::Create(TokenCache, PP); - if (PM) PP.setPTHManager(PM); - } - // Once we've read this, we're done. return false; } @@ -1142,18 +1136,33 @@ public: virtual ~DriverPreprocessorFactory() {} virtual Preprocessor* CreatePreprocessor() { - Preprocessor* PP = new Preprocessor(Diags, LangInfo, Target, - SourceMgr, HeaderInfo); + llvm::OwningPtr<PTHManager> PTHMgr; + + // Use PTH? + if (!TokenCache.empty()) + PTHMgr.reset(PTHManager::Create(TokenCache)); + + // Create the Preprocessor. + llvm::OwningPtr<Preprocessor> PP(new Preprocessor(Diags, LangInfo, Target, + SourceMgr, HeaderInfo, + PTHMgr.get())); + + // Note that this is different then passing PTHMgr to Preprocessor's ctor. + // That argument is used as the IdentifierInfoLookup argument to + // IdentifierTable's ctor. + if (PTHMgr) { + PTHMgr->setPreprocessor(PP.get()); + PP->setPTHManager(PTHMgr.take()); + } if (InitializePreprocessor(*PP, InitializeSourceMgr, InFile)) { - delete PP; return NULL; } /// FIXME: PP can only handle one callback if (ProgAction != PrintPreprocessedInput) { const char* ErrStr; - bool DFG = CreateDependencyFileGen(PP, OutputFile, InFile, ErrStr); + bool DFG = CreateDependencyFileGen(PP.get(), OutputFile, InFile, ErrStr); if (!DFG && ErrStr) { fprintf(stderr, "%s", ErrStr); return NULL; @@ -1161,7 +1170,7 @@ public: } InitializeSourceMgr = false; - return PP; + return PP.take(); } }; } diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index a0fac27657..7a4db7c02a 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -19,6 +19,7 @@ #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Bitcode/SerializationFwd.h" #include <string> #include <cassert> @@ -55,12 +56,13 @@ class IdentifierInfo { bool IsExtension : 1; // True if identifier is a lang extension. bool IsPoisoned : 1; // True if identifier is poisoned. bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword. - // 10 bits left in 32-bit word. + bool IndirectString : 1; // True if the string is stored indirectly. + // 9 bits left in 32-bit word. void *FETokenInfo; // Managed by the language front-end. IdentifierInfo(const IdentifierInfo&); // NONCOPYABLE. void operator=(const IdentifierInfo&); // NONASSIGNABLE. public: - IdentifierInfo(); + IdentifierInfo(bool usesIndirectString = false); /// isStr - Return true if this is the identifier for the specified string. /// This is intended to be used for string literals only: II->isStr("foo"). @@ -73,6 +75,13 @@ public: /// string is properly null terminated. /// const char *getName() const { + if (IndirectString) { + // The 'this' pointer really points to a + // std::pair<IdentifierInfo, const char*>, where internal pointer + // points to the external string data. + return ((std::pair<IdentifierInfo, const char*>*) this)->second + 4; + } + // We know that this is embedded into a StringMapEntry, and it knows how to // efficiently find the string. return llvm::StringMapEntry<IdentifierInfo>:: @@ -82,6 +91,17 @@ public: /// getLength - Efficiently return the length of this identifier info. /// unsigned getLength() const { + if (IndirectString) { + // The 'this' pointer really points to a + // std::pair<IdentifierInfo, const char*>, where internal pointer + // points to the external string data. + const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second; + return ((unsigned) p[0]) + | (((unsigned) p[1]) << 8) + | (((unsigned) p[2]) << 16) + | (((unsigned) p[3]) << 24); + } + return llvm::StringMapEntry<IdentifierInfo>:: GetStringMapEntryFromValue(*this).getKeyLength(); } @@ -161,6 +181,20 @@ public: void Read(llvm::Deserializer& D); }; +/// IdentifierInfoLookup - An abstract class used by IdentifierTable that +/// provides an interface for for performing lookups from strings +/// (const char *) to IdentiferInfo objects. +class IdentifierInfoLookup { +public: + virtual ~IdentifierInfoLookup(); + + /// get - Return the identifier token info for the specified named identifier. + /// Unlike the version in IdentifierTable, this returns a pointer instead + /// of a reference. If the pointer is NULL then the IdentifierInfo cannot + /// be found. + virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd) = 0; +}; + /// IdentifierTable - This table implements an efficient mapping from strings to /// IdentifierInfo nodes. It has no other purpose, but this is an /// extremely performance-critical piece of the code, as each occurrance of @@ -170,15 +204,27 @@ class IdentifierTable { // BumpPtrAllocator! typedef llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator> HashTableTy; HashTableTy HashTable; + + IdentifierInfoLookup* ExternalLookup; public: /// IdentifierTable ctor - Create the identifier table, populating it with /// info about the language keywords for the language specified by LangOpts. - IdentifierTable(const LangOptions &LangOpts); + IdentifierTable(const LangOptions &LangOpts, + IdentifierInfoLookup* externalLookup = 0); + + llvm::BumpPtrAllocator& getAllocator() { + return HashTable.getAllocator(); + } /// get - Return the identifier token info for the specified named identifier. /// IdentifierInfo &get(const char *NameStart, const char *NameEnd) { + if (ExternalLookup) { + IdentifierInfo* II = ExternalLookup->get(NameStart, NameEnd); + if (II) return *II; + } + return HashTable.GetOrCreateValue(NameStart, NameEnd).getValue(); } diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h index 60beee3645..634c088b8f 100644 --- a/include/clang/Lex/PTHManager.h +++ b/include/clang/Lex/PTHManager.h @@ -16,7 +16,9 @@ #include "clang/Lex/PTHLexer.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/IdentifierTable.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Allocator.h" #include <string> namespace llvm { @@ -26,8 +28,6 @@ namespace llvm { namespace clang { class FileEntry; -class IdentifierInfo; -class IdentifierTable; class PTHLexer; class PTHManager; @@ -54,7 +54,7 @@ public: LinearItr(tableBeg) {} }; -class PTHManager { +class PTHManager : public IdentifierInfoLookup { friend class PTHLexer; friend class PTHSpellingSearch; @@ -64,6 +64,9 @@ class PTHManager { /// A map from FileIDs to SpellingSearch objects. llvm::DenseMap<unsigned,PTHSpellingSearch*> SpellingMap; + /// Alloc - Allocator used for IdentifierInfo objects. + llvm::BumpPtrAllocator Alloc; + /// IdMap - A lazily generated cache mapping from persistent identifiers to /// IdentifierInfo*. IdentifierInfo** PerIDCache; @@ -75,47 +78,59 @@ class PTHManager { /// IdDataTable - Array representing the mapping from persistent IDs to the /// data offset within the PTH file containing the information to /// reconsitute an IdentifierInfo. - const char* IdDataTable; + const char* const IdDataTable; + + /// SortedIdTable - Array ordering persistent identifier IDs by the lexical + /// order of their corresponding strings. This is used by get(). + const char* const SortedIdTable; - /// ITable - The IdentifierTable used for the translation unit being lexed. - IdentifierTable& ITable; + /// NumIds - The number of identifiers in the PTH file. + const unsigned NumIds; /// PP - The Preprocessor object that will use this PTHManager to create /// PTHLexer objects. - Preprocessor& PP; + Preprocessor* PP; /// This constructor is intended to only be called by the static 'Create' /// method. PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, const char* idDataTable, IdentifierInfo** perIDCache, - Preprocessor& pp); + const char* sortedIdTable, unsigned numIds); // Do not implement. PTHManager(); void operator=(const PTHManager&); - /// GetIdentifierInfo - Used by PTHManager to reconstruct IdentifierInfo - /// objects from the PTH file. - IdentifierInfo* GetIdentifierInfo(unsigned); - /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached /// spelling for a token. unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer); -public: + /// GetIdentifierInfo - Used to reconstruct IdentifierInfo objects from the + /// PTH file. + IdentifierInfo* GetIdentifierInfo(unsigned); + +public: ~PTHManager(); + /// get - Return the identifier token info for the specified named identifier. + /// Unlike the version in IdentifierTable, this returns a pointer instead + /// of a reference. If the pointer is NULL then the IdentifierInfo cannot + /// be found. + IdentifierInfo* get(const char *NameStart, const char *NameEnd); + /// Create - This method creates PTHManager objects. The 'file' argument /// is the name of the PTH file. This method returns NULL upon failure. - static PTHManager* Create(const std::string& file, Preprocessor& PP); + static PTHManager* Create(const std::string& file); + void setPreprocessor(Preprocessor* pp) { PP = pp; } + /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the /// specified file. This method returns NULL if no cached tokens exist. /// It is the responsibility of the caller to 'delete' the returned object. PTHLexer* CreateLexer(unsigned FileID, const FileEntry* FE); - unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer); + unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer); }; } // end namespace clang diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 5ee8378d52..b48ce38998 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -191,7 +191,9 @@ private: // Cached tokens state. public: Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target, - SourceManager &SM, HeaderSearch &Headers); + SourceManager &SM, HeaderSearch &Headers, + IdentifierInfoLookup* IILookup); + ~Preprocessor(); Diagnostic &getDiagnostics() const { return Diags; } diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp index 048abf8b2e..1243e3eb8a 100644 --- a/lib/Basic/IdentifierTable.cpp +++ b/lib/Basic/IdentifierTable.cpp @@ -25,7 +25,7 @@ using namespace clang; // IdentifierInfo Implementation //===----------------------------------------------------------------------===// -IdentifierInfo::IdentifierInfo() { +IdentifierInfo::IdentifierInfo(bool usesIndirectString) { TokenID = tok::identifier; ObjCOrBuiltinID = 0; HasMacro = false; @@ -33,15 +33,19 @@ IdentifierInfo::IdentifierInfo() { IsPoisoned = false; IsCPPOperatorKeyword = false; FETokenInfo = 0; + IndirectString = usesIndirectString; } //===----------------------------------------------------------------------===// // IdentifierTable Implementation //===----------------------------------------------------------------------===// -IdentifierTable::IdentifierTable(const LangOptions &LangOpts) - // Start with space for 8K identifiers. - : HashTable(8192) { +IdentifierInfoLookup::~IdentifierInfoLookup() {} + +IdentifierTable::IdentifierTable(const LangOptions &LangOpts, + IdentifierInfoLookup* externalLookup) + : HashTable(8192), // Start with space for 8K identifiers. + ExternalLookup(externalLookup) { // Populate the identifier table with info about keywords for the current // language. @@ -50,7 +54,7 @@ IdentifierTable::IdentifierTable(const LangOptions &LangOpts) // This cstor is intended to be used only for serialization. IdentifierTable::IdentifierTable() - : HashTable(8192) { } + : HashTable(8192), ExternalLookup(0) { } //===----------------------------------------------------------------------===// // Language Keyword Implementation diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index 86c65771fd..a2d66fe075 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -506,9 +506,10 @@ public: PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, const char* idDataTable, IdentifierInfo** perIDCache, - Preprocessor& pp) + const char* sortedIdTable, unsigned numIds) : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), - IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} + IdDataTable(idDataTable), SortedIdTable(sortedIdTable), + NumIds(numIds), PP(0) {} PTHManager::~PTHManager() { delete Buf; @@ -516,7 +517,7 @@ PTHManager::~PTHManager() { free(PerIDCache); } -PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { +PTHManager* PTHManager::Create(const std::string& file) { // Memory map the PTH file. llvm::OwningPtr<llvm::MemoryBuffer> @@ -563,6 +564,14 @@ PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { return 0; // FIXME: Proper error diagnostic? } + // Get the location of the lexigraphically-sorted table of persistent IDs. + const char* SortedIdTableOffset = EndTable + sizeof(uint32_t)*2; + const char* SortedIdTable = BufBeg + Read32(SortedIdTableOffset); + if (!(SortedIdTable > BufBeg && SortedIdTable < BufEnd)) { + assert(false && "Invalid PTH file."); + return 0; // FIXME: Proper error diagnostic? + } + // Get the number of IdentifierInfos and pre-allocate the identifier cache. uint32_t NumIds = Read32(IData); @@ -577,14 +586,15 @@ PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { return 0; } - // Create the new lexer. - return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); + // Create the new PTHManager. + return new PTHManager(File.take(), FL.take(), IData, PerIDCache, + SortedIdTable, NumIds); } IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) { // Check if the IdentifierInfo has already been resolved. - IdentifierInfo*& II = PerIDCache[persistentID]; + IdentifierInfo* II = PerIDCache[persistentID]; if (II) return II; // Look in the PTH file for the string data for the IdentifierInfo object. @@ -592,14 +602,66 @@ IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) { const char* IDData = Buf->getBufferStart() + Read32(TableEntry); assert(IDData < Buf->getBufferEnd()); - // Read the length of the string. - uint32_t len = Read32(IDData); + // Allocate the object. + std::pair<IdentifierInfo,const char*> *Mem = + Alloc.Allocate<std::pair<IdentifierInfo,const char*> >(); + + Mem->second = IDData; + II = new ((void*) Mem) IdentifierInfo(true); - // Get the IdentifierInfo* with the specified string. - II = &ITable.get(IDData, IDData+len); + // Store the new IdentifierInfo in the cache. + PerIDCache[persistentID] = II; return II; } +IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) { + unsigned min = 0; + unsigned max = NumIds; + unsigned len = NameEnd - NameStart; + + do { + unsigned i = (max - min) / 2 + min; + const char* p = SortedIdTable + (i * 4); + + // Read the persistentID. + unsigned perID = + ((unsigned) ((uint8_t) p[0])) + | (((unsigned) ((uint8_t) p[1])) << 8) + | (((unsigned) ((uint8_t) p[2])) << 16) + | (((unsigned) ((uint8_t) p[3])) << 24); + + // Get the IdentifierInfo. + IdentifierInfo* II = GetIdentifierInfo(perID); + + // First compare the lengths. + unsigned IILen = II->getLength(); + if (len < IILen) goto IsLess; + if (len > IILen) goto IsGreater; + + // Now compare the strings! + { + signed comp = strncmp(NameStart, II->getName(), len); + if (comp < 0) goto IsLess; + if (comp > 0) goto IsGreater; + } + // We found a match! + return II; + + IsGreater: + if (i == min) break; + min = i; + continue; + + IsLess: + max = i; + assert(!(max == min) || (min == i)); + } + while (1); + + return 0; +} + + PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { if (!FE) @@ -634,6 +696,7 @@ PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable); SpellingMap[FileID] = ss; - return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, + assert(PP && "No preprocessor set yet!"); + return new PTHLexer(*PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, *ss, *this); } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index e09ce1312d..cac78fe6e0 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -45,9 +45,10 @@ PreprocessorFactory::~PreprocessorFactory() {} Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target, SourceManager &SM, - HeaderSearch &Headers) + HeaderSearch &Headers, + IdentifierInfoLookup* IILookup) : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()), - SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts), + SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts, IILookup), CurPPLexer(0), CurDirLookup(0), Callbacks(0) { ScratchBuf = new ScratchBuffer(SourceMgr); |