From 677e15ffee2ecc9c1c8f46fd77cab4b5afb59640 Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Tue, 19 Mar 2013 00:28:20 +0000 Subject: Eliminate race condition between module rebuild and the global module index. The global module index was querying the file manager for each of the module files it knows about at load time, to prune out any out-of-date information. The file manager would then cache the results of the stat() falls used to find that module file. Later, the same translation unit could end up trying to import one of the module files that had previously been ignored by the module cache, but after some other Clang instance rebuilt the module file to bring it up-to-date. The stale stat() results in the file manager would trigger a second rebuild of the already-up-to-date module, causing failures down the line. The global module index now lazily resolves its module file references to actual AST reader module files only after the module file has been loaded, eliminating the stat-caching race. Moreover, the AST reader can communicate to its caller that a module file is missing (rather than simply being out-of-date), allowing us to simplify the module-loading logic and allowing the compiler to recover if a dependent module file ends up getting deleted. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@177367 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Serialization/GlobalModuleIndex.cpp | 237 ++++++++++++++------------------ 1 file changed, 104 insertions(+), 133 deletions(-) (limited to 'lib/Serialization/GlobalModuleIndex.cpp') diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp index 7d34f85fd8..8b5851dbe5 100644 --- a/lib/Serialization/GlobalModuleIndex.cpp +++ b/lib/Serialization/GlobalModuleIndex.cpp @@ -57,6 +57,8 @@ static const char * const IndexFileName = "modules.idx"; /// \brief The global index file version. static const unsigned CurrentVersion = 1; +ModuleFileNameResolver::~ModuleFileNameResolver() { } + //----------------------------------------------------------------------------// // Global module index reader. //----------------------------------------------------------------------------// @@ -115,29 +117,16 @@ public: typedef OnDiskChainedHashTable IdentifierIndexTable; -/// \brief Module information as it was loaded from the index file. -struct LoadedModuleInfo { - const FileEntry *File; - SmallVector Dependencies; - SmallVector ImportedBy; -}; - } -GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr, - llvm::MemoryBuffer *Buffer, +GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor) - : Buffer(Buffer), IdentifierIndex(), + : Buffer(Buffer), Resolver(), IdentifierIndex(), NumIdentifierLookups(), NumIdentifierLookupHits() { - typedef llvm::DenseMap LoadedModulesMap; - LoadedModulesMap LoadedModules; - // Read the global index. - unsigned LargestID = 0; bool InGlobalIndexBlock = false; bool Done = false; - bool AnyOutOfDate = false; while (!Done) { llvm::BitstreamEntry Entry = Cursor.advance(); @@ -185,41 +174,33 @@ GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr, case MODULE: { unsigned Idx = 0; unsigned ID = Record[Idx++]; - if (ID > LargestID) - LargestID = ID; - - off_t Size = Record[Idx++]; - time_t ModTime = Record[Idx++]; + + // Make room for this module's information. + if (ID == Modules.size()) + Modules.push_back(ModuleInfo()); + else + Modules.resize(ID + 1); + + // Size/modification time for this module file at the time the + // global index was built. + Modules[ID].Size = Record[Idx++]; + Modules[ID].ModTime = Record[Idx++]; // File name. unsigned NameLen = Record[Idx++]; - llvm::SmallString<64> FileName(Record.begin() + Idx, - Record.begin() + Idx + NameLen); + Modules[ID].FileName.assign(Record.begin() + Idx, + Record.begin() + Idx + NameLen); Idx += NameLen; // Dependencies unsigned NumDeps = Record[Idx++]; - llvm::SmallVector - Dependencies(Record.begin() + Idx, Record.begin() + Idx + NumDeps); - - // Find the file. If we can't find it, ignore it. - const FileEntry *File = FileMgr.getFile(FileName, /*openFile=*/false, - /*cacheFailure=*/false); - if (!File) { - AnyOutOfDate = true; - break; - } - - // If the module file is newer than the index, ignore it. - if (File->getSize() != Size || File->getModificationTime() != ModTime) { - AnyOutOfDate = true; - break; - } + Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(), + Record.begin() + Idx, + Record.begin() + Idx + NumDeps); + Idx += NumDeps; - // Record this module. The dependencies will be resolved later. - LoadedModuleInfo &Info = LoadedModules[ID]; - Info.File = File; - Info.Dependencies.swap(Dependencies); + // Make sure we're at the end of the record. + assert(Idx == Record.size() && "More module info?"); break; } @@ -235,74 +216,11 @@ GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr, } } - // If there are any modules that have gone out-of-date, prune out any modules - // that depend on them. - if (AnyOutOfDate) { - // First, build back links in the module dependency graph. - SmallVector Stack; - for (LoadedModulesMap::iterator LM = LoadedModules.begin(), - LMEnd = LoadedModules.end(); - LM != LMEnd; ++LM) { - unsigned ID = LM->first; - - // If this module is out-of-date, push it onto the stack. - if (LM->second.File == 0) - Stack.push_back(ID); - - for (unsigned I = 0, N = LM->second.Dependencies.size(); I != N; ++I) { - unsigned DepID = LM->second.Dependencies[I]; - LoadedModulesMap::iterator Known = LoadedModules.find(DepID); - if (Known == LoadedModules.end() || !Known->second.File) { - // The dependency was out-of-date, so mark us as out of date. - // This is just an optimization. - if (LM->second.File) - Stack.push_back(ID); - - LM->second.File = 0; - continue; - } - - // Record this reverse dependency. - Known->second.ImportedBy.push_back(ID); - } - } - - // Second, walk the back links from out-of-date modules to those modules - // that depend on them, making those modules out-of-date as well. - while (!Stack.empty()) { - unsigned ID = Stack.back(); - Stack.pop_back(); - - LoadedModuleInfo &Info = LoadedModules[ID]; - for (unsigned I = 0, N = Info.ImportedBy.size(); I != N; ++I) { - unsigned FromID = Info.ImportedBy[I]; - if (LoadedModules[FromID].File) { - LoadedModules[FromID].File = 0; - Stack.push_back(FromID); - } - } - } - } - - // Allocate the vector containing information about all of the modules. - Modules.resize(LargestID + 1); - for (LoadedModulesMap::iterator LM = LoadedModules.begin(), - LMEnd = LoadedModules.end(); - LM != LMEnd; ++LM) { - if (!LM->second.File) - continue; - - Modules[LM->first].File = LM->second.File; - - // Resolve dependencies. Drop any we can't resolve due to out-of-date - // module files. - for (unsigned I = 0, N = LM->second.Dependencies.size(); I != N; ++I) { - unsigned DepID = LM->second.Dependencies[I]; - LoadedModulesMap::iterator Known = LoadedModules.find(DepID); - if (Known == LoadedModules.end() || !Known->second.File) - continue; - - Modules[LM->first].Dependencies.push_back(Known->second.File); + // Compute imported-by relation. + for (unsigned ID = 0, IDEnd = Modules.size(); ID != IDEnd; ++ID) { + for (unsigned D = 0, DEnd = Modules[ID].Dependencies.size(); + D != DEnd; ++D) { + Modules[Modules[ID].Dependencies[D]].ImportedBy.push_back(ID); } } } @@ -310,15 +228,14 @@ GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr, GlobalModuleIndex::~GlobalModuleIndex() { } std::pair -GlobalModuleIndex::readIndex(FileManager &FileMgr, StringRef Path) { +GlobalModuleIndex::readIndex(StringRef Path) { // Load the index file, if it's there. llvm::SmallString<128> IndexPath; IndexPath += Path; llvm::sys::path::append(IndexPath, IndexFileName); - llvm::OwningPtr Buffer( - FileMgr.getBufferForFile(IndexPath)); - if (!Buffer) + llvm::OwningPtr Buffer; + if (llvm::MemoryBuffer::getFile(IndexPath, Buffer) != llvm::errc::success) return std::make_pair((GlobalModuleIndex *)0, EC_NotFound); /// \brief The bitstream reader from which we'll read the AST file. @@ -336,38 +253,41 @@ GlobalModuleIndex::readIndex(FileManager &FileMgr, StringRef Path) { return std::make_pair((GlobalModuleIndex *)0, EC_IOError); } - return std::make_pair(new GlobalModuleIndex(FileMgr, Buffer.take(), Cursor), - EC_None); + return std::make_pair(new GlobalModuleIndex(Buffer.take(), Cursor), EC_None); } -void GlobalModuleIndex::getKnownModules( - SmallVectorImpl &ModuleFiles) { +void +GlobalModuleIndex::getKnownModules(SmallVectorImpl &ModuleFiles) { ModuleFiles.clear(); for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - if (Modules[I].File) - ModuleFiles.push_back(Modules[I].File); + if (ModuleFile *File = resolveModuleFile(I)) + ModuleFiles.push_back(File); } } void GlobalModuleIndex::getModuleDependencies( - const clang::FileEntry *ModuleFile, - SmallVectorImpl &Dependencies) { + ModuleFile *File, + SmallVectorImpl &Dependencies) { // If the file -> index mapping is empty, populate it now. if (ModulesByFile.empty()) { for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - if (Modules[I].File) - ModulesByFile[Modules[I].File] = I; + resolveModuleFile(I); } } // Look for information about this module file. - llvm::DenseMap::iterator Known - = ModulesByFile.find(ModuleFile); + llvm::DenseMap::iterator Known + = ModulesByFile.find(File); if (Known == ModulesByFile.end()) return; // Record dependencies. - Dependencies = Modules[Known->second].Dependencies; + Dependencies.clear(); + ArrayRef StoredDependencies = Modules[Known->second].Dependencies; + for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) { + if (ModuleFile *MF = resolveModuleFile(I)) + Dependencies.push_back(MF); + } } bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { @@ -388,17 +308,62 @@ bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { SmallVector ModuleIDs = *Known; for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) { - unsigned ID = ModuleIDs[I]; - if (ID >= Modules.size() || !Modules[ID].File) - continue; - - Hits.insert(Modules[ID].File); + if (ModuleFile *File = resolveModuleFile(ModuleIDs[I])) + Hits.insert(File); } ++NumIdentifierLookupHits; return true; } +ModuleFile *GlobalModuleIndex::resolveModuleFile(unsigned ID) { + assert(ID < Modules.size() && "Out-of-bounds module index"); + + // If we already have a module file, return it. + if (Modules[ID].File) + return Modules[ID].File; + + // If we don't have a file name, or if there is no resolver, we can't + // resolve this. + if (Modules[ID].FileName.empty() || !Resolver) + return 0; + + // Try to resolve this module file. + ModuleFile *File; + if (Resolver->resolveModuleFileName(Modules[ID].FileName, Modules[ID].Size, + Modules[ID].ModTime, File)) { + // Clear out the module files for anything that depends on this module. + llvm::SmallVector Stack; + + Stack.push_back(ID); + while (!Stack.empty()) { + unsigned Victim = Stack.back(); + Stack.pop_back(); + + // Mark this file as ignored. + Modules[Victim].File = 0; + Modules[Victim].FileName.clear(); + + // Push any not-yet-ignored imported modules onto the stack. + for (unsigned I = 0, N = Modules[Victim].ImportedBy.size(); I != N; ++I) { + unsigned NextVictim = Modules[Victim].ImportedBy[I]; + if (!Modules[NextVictim].FileName.empty()) + Stack.push_back(NextVictim); + } + } + + return 0; + } + + // If we have a module file, record it. + if (File) { + Modules[ID].File = File; + ModulesByFile[File] = ID; + } + + return File; +} + void GlobalModuleIndex::printStats() { std::fprintf(stderr, "*** Global Module Index Statistics:\n"); if (NumIdentifierLookups) { @@ -629,6 +594,10 @@ bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { // Skip the import location ++Idx; + // Load stored size/modification time. + off_t StoredSize = (off_t)Record[Idx++]; + time_t StoredModTime = (time_t)Record[Idx++]; + // Retrieve the imported file name. unsigned Length = Record[Idx++]; SmallString<128> ImportedFile(Record.begin() + Idx, @@ -639,7 +608,9 @@ bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { const FileEntry *DependsOnFile = FileMgr.getFile(ImportedFile, /*openFile=*/false, /*cacheFailure=*/false); - if (!DependsOnFile) + if (!DependsOnFile || + (StoredSize != DependsOnFile->getSize()) || + (StoredModTime != DependsOnFile->getModificationTime())) return true; // Record the dependency. -- cgit v1.2.3-70-g09d2 From fa69fc19121da3fc5673ccc00d4e8afa5b540a4f Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Fri, 22 Mar 2013 18:50:14 +0000 Subject: Simplify ModuleManager/GlobalModuleIndex interaction to eliminate a pile of extraneous stats(). The refactoring in r177367 introduced a serious performance bug where the "lazy" resolution of module file names in the global module index to actual module file entries in the module manager would perform repeated negative stats(). The new interaction requires the module manager to inform the global module index when a module file has been loaded, eliminating the extraneous stat()s and a bunch of bookkeeping on both sides. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@177750 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Serialization/GlobalModuleIndex.h | 65 ++++------------- include/clang/Serialization/ModuleManager.h | 18 ++--- lib/Serialization/ASTReader.cpp | 7 +- lib/Serialization/GlobalModuleIndex.cpp | 96 ++++++++----------------- lib/Serialization/ModuleManager.cpp | 65 +++++------------ 5 files changed, 76 insertions(+), 175 deletions(-) (limited to 'lib/Serialization/GlobalModuleIndex.cpp') diff --git a/include/clang/Serialization/GlobalModuleIndex.h b/include/clang/Serialization/GlobalModuleIndex.h index c7e9ab4535..eaf26d1df1 100644 --- a/include/clang/Serialization/GlobalModuleIndex.h +++ b/include/clang/Serialization/GlobalModuleIndex.h @@ -20,6 +20,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include @@ -43,36 +44,6 @@ using llvm::SmallVectorImpl; using llvm::StringRef; using serialization::ModuleFile; -/// \brief Abstract class that resolves a module file name to a ModuleFile -/// pointer, which is used to uniquely describe a module file. -class ModuleFileNameResolver { -public: - virtual ~ModuleFileNameResolver(); - - /// \brief Attempt to resolve the given module file name to a specific, - /// already-loaded module. - /// - /// \param FileName The name of the module file. - /// - /// \param ExpectedSize The size that the module file is expected to have. - /// If the actual size differs, the resolver should return \c true. - /// - /// \param ExpectedModTime The modification time that the module file is - /// expected to have. If the actual modification time differs, the resolver - /// should return \c true. - /// - /// \param File Will be set to the module file if there is one, or null - /// otherwise. - /// - /// \returns True if a module file exists but does not meet the size/ - /// modification time criteria, false if the module file is available or has - /// not yet been loaded. - virtual bool resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File) = 0; -}; - /// \brief A global index for a set of module files, providing information about /// the identifiers within those module files. /// @@ -89,9 +60,6 @@ class GlobalModuleIndex { /// as the global module index is live. llvm::OwningPtr Buffer; - /// \brief The module file name resolver. - ModuleFileNameResolver *Resolver; - /// \brief The hash table. /// /// This pointer actually points to a IdentifierIndexTable object, @@ -103,7 +71,7 @@ class GlobalModuleIndex { struct ModuleInfo { ModuleInfo() : File(), Size(), ModTime() { } - /// \brief The module file, if it is known. + /// \brief The module file, once it has been resolved. ModuleFile *File; /// \brief The module file name. @@ -119,9 +87,6 @@ class GlobalModuleIndex { /// \brief The module IDs on which this module directly depends. /// FIXME: We don't really need a vector here. llvm::SmallVector Dependencies; - - /// \brief The module IDs that directly depend on this module. - llvm::SmallVector ImportedBy; }; /// \brief A mapping from module IDs to information about each module. @@ -135,13 +100,19 @@ class GlobalModuleIndex { /// corresponding index into the \c Modules vector. llvm::DenseMap ModulesByFile; + /// \brief The set of modules that have not yet been resolved. + /// + /// The string is just the name of the module itself, which maps to the + /// module ID. + llvm::StringMap UnresolvedModules; + /// \brief The number of identifier lookups we performed. unsigned NumIdentifierLookups; /// \brief The number of identifier lookup hits, where we recognize the /// identifier. unsigned NumIdentifierLookupHits; - + /// \brief Internal constructor. Use \c readIndex() to read an index. explicit GlobalModuleIndex(llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor); @@ -200,19 +171,11 @@ public: /// \returns true if the identifier is known to the index, false otherwise. bool lookupIdentifier(StringRef Name, HitSet &Hits); - /// \brief Set the module file name resolver. - void setResolver(ModuleFileNameResolver *Resolver) { - this->Resolver = Resolver; - } - - /// \brief Note that additional modules have been loaded, which invalidates - /// the module file -> module cache. - void noteAdditionalModulesLoaded() { - ModulesByFile.clear(); - } - - /// \brief Resolve the module file for the module with the given ID. - ModuleFile *resolveModuleFile(unsigned ID); + /// \brief Note that the given module file has been loaded. + /// + /// \returns false if the global module index has information about this + /// module file, and true otherwise. + bool loadedModuleFile(ModuleFile *File); /// \brief Print statistics to standard error. void printStats(); diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h index 9b58b75ebb..c6e3265b3b 100644 --- a/include/clang/Serialization/ModuleManager.h +++ b/include/clang/Serialization/ModuleManager.h @@ -16,18 +16,18 @@ #define LLVM_CLANG_SERIALIZATION_MODULE_MANAGER_H #include "clang/Basic/FileManager.h" -#include "clang/Serialization/GlobalModuleIndex.h" #include "clang/Serialization/Module.h" #include "llvm/ADT/DenseMap.h" namespace clang { +class GlobalModuleIndex; class ModuleMap; namespace serialization { - + /// \brief Manages the set of modules loaded by an AST reader. -class ModuleManager : public ModuleFileNameResolver { +class ModuleManager { /// \brief The chain of AST files. The first entry is the one named by the /// user, the last one is the one that doesn't depend on anything further. SmallVector Chain; @@ -61,9 +61,6 @@ class ModuleManager : public ModuleFileNameResolver { /// just an non-owning pointer. GlobalModuleIndex *GlobalIndex; - /// \brief Update the set of modules files we know about known to the global index. - void updateModulesInCommonWithGlobalIndex(); - /// \brief State used by the "visit" operation to avoid malloc traffic in /// calls to visit(). struct VisitState { @@ -202,6 +199,10 @@ public: /// \brief Set the global module index. void setGlobalIndex(GlobalModuleIndex *Index); + /// \brief Notification from the AST reader that the given module file + /// has been "accepted", and will not (can not) be unloaded. + void moduleFileAccepted(ModuleFile *MF); + /// \brief Visit each of the modules. /// /// This routine visits each of the modules, starting with the @@ -270,11 +271,6 @@ public: time_t ExpectedModTime, const FileEntry *&File); - virtual bool resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File); - /// \brief View the graphviz representation of the module graph. void viewGraph(); }; diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 29538a13ef..0f674530bc 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -2872,11 +2872,16 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName, } } - // Setup the import locations. + // Setup the import locations and notify the module manager that we've + // committed to these module files. for (SmallVectorImpl::iterator M = Loaded.begin(), MEnd = Loaded.end(); M != MEnd; ++M) { ModuleFile &F = *M->Mod; + + ModuleMgr.moduleFileAccepted(&F); + + // Set the import location. F.DirectImportLoc = ImportLoc; if (!M->ImportedBy) F.ImportLoc = M->ImportLoc; diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp index 8b5851dbe5..f9acb84728 100644 --- a/lib/Serialization/GlobalModuleIndex.cpp +++ b/lib/Serialization/GlobalModuleIndex.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/OnDiskHashTable.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/GlobalModuleIndex.h" +#include "clang/Serialization/Module.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" @@ -57,8 +58,6 @@ static const char * const IndexFileName = "modules.idx"; /// \brief The global index file version. static const unsigned CurrentVersion = 1; -ModuleFileNameResolver::~ModuleFileNameResolver() { } - //----------------------------------------------------------------------------// // Global module index reader. //----------------------------------------------------------------------------// @@ -121,7 +120,7 @@ typedef OnDiskChainedHashTable IdentifierIndexTable; GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor) - : Buffer(Buffer), Resolver(), IdentifierIndex(), + : Buffer(Buffer), IdentifierIndex(), NumIdentifierLookups(), NumIdentifierLookupHits() { // Read the global index. @@ -201,6 +200,9 @@ GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, // Make sure we're at the end of the record. assert(Idx == Record.size() && "More module info?"); + + // Record this module as an unresolved module. + UnresolvedModules[llvm::sys::path::stem(Modules[ID].FileName)] = ID; break; } @@ -215,14 +217,6 @@ GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, break; } } - - // Compute imported-by relation. - for (unsigned ID = 0, IDEnd = Modules.size(); ID != IDEnd; ++ID) { - for (unsigned D = 0, DEnd = Modules[ID].Dependencies.size(); - D != DEnd; ++D) { - Modules[Modules[ID].Dependencies[D]].ImportedBy.push_back(ID); - } - } } GlobalModuleIndex::~GlobalModuleIndex() { } @@ -260,21 +254,14 @@ void GlobalModuleIndex::getKnownModules(SmallVectorImpl &ModuleFiles) { ModuleFiles.clear(); for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - if (ModuleFile *File = resolveModuleFile(I)) - ModuleFiles.push_back(File); + if (ModuleFile *MF = Modules[I].File) + ModuleFiles.push_back(MF); } } void GlobalModuleIndex::getModuleDependencies( ModuleFile *File, SmallVectorImpl &Dependencies) { - // If the file -> index mapping is empty, populate it now. - if (ModulesByFile.empty()) { - for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - resolveModuleFile(I); - } - } - // Look for information about this module file. llvm::DenseMap::iterator Known = ModulesByFile.find(File); @@ -285,7 +272,7 @@ void GlobalModuleIndex::getModuleDependencies( Dependencies.clear(); ArrayRef StoredDependencies = Modules[Known->second].Dependencies; for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) { - if (ModuleFile *MF = resolveModuleFile(I)) + if (ModuleFile *MF = Modules[I].File) Dependencies.push_back(MF); } } @@ -308,60 +295,39 @@ bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { SmallVector ModuleIDs = *Known; for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) { - if (ModuleFile *File = resolveModuleFile(ModuleIDs[I])) - Hits.insert(File); + if (ModuleFile *MF = Modules[ModuleIDs[I]].File) + Hits.insert(MF); } ++NumIdentifierLookupHits; return true; } -ModuleFile *GlobalModuleIndex::resolveModuleFile(unsigned ID) { - assert(ID < Modules.size() && "Out-of-bounds module index"); - - // If we already have a module file, return it. - if (Modules[ID].File) - return Modules[ID].File; - - // If we don't have a file name, or if there is no resolver, we can't - // resolve this. - if (Modules[ID].FileName.empty() || !Resolver) - return 0; - - // Try to resolve this module file. - ModuleFile *File; - if (Resolver->resolveModuleFileName(Modules[ID].FileName, Modules[ID].Size, - Modules[ID].ModTime, File)) { - // Clear out the module files for anything that depends on this module. - llvm::SmallVector Stack; - - Stack.push_back(ID); - while (!Stack.empty()) { - unsigned Victim = Stack.back(); - Stack.pop_back(); - - // Mark this file as ignored. - Modules[Victim].File = 0; - Modules[Victim].FileName.clear(); - - // Push any not-yet-ignored imported modules onto the stack. - for (unsigned I = 0, N = Modules[Victim].ImportedBy.size(); I != N; ++I) { - unsigned NextVictim = Modules[Victim].ImportedBy[I]; - if (!Modules[NextVictim].FileName.empty()) - Stack.push_back(NextVictim); - } - } - - return 0; +bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) { + // Look for the module in the global module index based on the module name. + StringRef Name = llvm::sys::path::stem(File->FileName); + llvm::StringMap::iterator Known = UnresolvedModules.find(Name); + if (Known == UnresolvedModules.end()) { + return true; } - // If we have a module file, record it. - if (File) { - Modules[ID].File = File; - ModulesByFile[File] = ID; + // Rectify this module with the global module index. + ModuleInfo &Info = Modules[Known->second]; + + // If the size and modification time match what we expected, record this + // module file. + bool Failed = true; + if (File->File->getSize() == Info.Size && + File->File->getModificationTime() == Info.ModTime) { + Info.File = File; + ModulesByFile[File] = Known->second; + + Failed = false; } - return File; + // One way or another, we have resolved this module file. + UnresolvedModules.erase(Known); + return Failed; } void GlobalModuleIndex::printStats() { diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp index 7384da5404..193a38b973 100644 --- a/lib/Serialization/ModuleManager.cpp +++ b/lib/Serialization/ModuleManager.cpp @@ -166,17 +166,6 @@ void ModuleManager::addInMemoryBuffer(StringRef FileName, InMemoryBuffers[Entry] = Buffer; } -void ModuleManager::updateModulesInCommonWithGlobalIndex() { - ModulesInCommonWithGlobalIndex.clear(); - - if (!GlobalIndex) - return; - - // Collect the set of modules known to the global index. - GlobalIndex->noteAdditionalModulesLoaded(); - GlobalIndex->getKnownModules(ModulesInCommonWithGlobalIndex); -} - ModuleManager::VisitState *ModuleManager::allocateVisitState() { // Fast path: if we have a cached state, use it. if (FirstVisitState) { @@ -198,10 +187,25 @@ void ModuleManager::returnVisitState(VisitState *State) { void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) { GlobalIndex = Index; - if (GlobalIndex) { - GlobalIndex->setResolver(this); + if (!GlobalIndex) { + ModulesInCommonWithGlobalIndex.clear(); + return; } - updateModulesInCommonWithGlobalIndex(); + + // Notify the global module index about all of the modules we've already + // loaded. + for (unsigned I = 0, N = Chain.size(); I != N; ++I) { + if (!GlobalIndex->loadedModuleFile(Chain[I])) { + ModulesInCommonWithGlobalIndex.push_back(Chain[I]); + } + } +} + +void ModuleManager::moduleFileAccepted(ModuleFile *MF) { + if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF)) + return; + + ModulesInCommonWithGlobalIndex.push_back(MF); } ModuleManager::ModuleManager(FileManager &FileMgr) @@ -264,11 +268,6 @@ ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData), assert(VisitOrder.size() == N && "Visitation order is wrong?"); - // We may need to update the set of modules we have in common with the - // global module index, since modules could have been added to the module - // manager since we loaded the global module index. - updateModulesInCommonWithGlobalIndex(); - delete FirstVisitState; FirstVisitState = 0; } @@ -387,34 +386,6 @@ bool ModuleManager::lookupModuleFile(StringRef FileName, return false; } -bool ModuleManager::resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File) { - File = 0; - - // Look for the file entry corresponding to this name. - const FileEntry *F; - if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, F)) - return true; - - // If there is no file, we've succeeded (trivially). - if (!F) - return false; - - // Determine whether we have a module file associated with this file entry. - llvm::DenseMap::iterator Known - = Modules.find(F); - if (Known == Modules.end()) { - // We don't know about this module file; invalidate the cache. - FileMgr.invalidateCache(F); - return false; - } - - File = Known->second; - return false; -} - #ifndef NDEBUG namespace llvm { template<> -- cgit v1.2.3-70-g09d2 From 87f9d81d0ab806dcf6ca50a0c068dcb2ba7f51b3 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Wed, 17 Apr 2013 22:10:55 +0000 Subject: [Modules] Use global index to improve typo correction performance Typo correction for an unqualified name needs to walk through all of the identifier tables of all modules. When we have a global index, just walk its identifier table only. rdar://13425732 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@179730 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/IdentifierTable.h | 2 +- include/clang/Serialization/ASTReader.h | 2 +- include/clang/Serialization/GlobalModuleIndex.h | 5 ++++ lib/Basic/IdentifierTable.cpp | 2 +- lib/Serialization/ASTReader.cpp | 5 +++- lib/Serialization/GlobalModuleIndex.cpp | 31 +++++++++++++++++++++++++ 6 files changed, 43 insertions(+), 4 deletions(-) (limited to 'lib/Serialization/GlobalModuleIndex.cpp') diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index c04a893c6f..d4d53390bd 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -394,7 +394,7 @@ public: /// /// \returns A new iterator into the set of known identifiers. The /// caller is responsible for deleting this iterator. - virtual IdentifierIterator *getIdentifiers() const; + virtual IdentifierIterator *getIdentifiers(); }; /// \brief An abstract class used to resolve numerical identifier diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h index 925533678c..a773e41667 100644 --- a/include/clang/Serialization/ASTReader.h +++ b/include/clang/Serialization/ASTReader.h @@ -1558,7 +1558,7 @@ public: /// \brief Retrieve an iterator into the set of all identifiers /// in all loaded AST files. - virtual IdentifierIterator *getIdentifiers() const; + virtual IdentifierIterator *getIdentifiers(); /// \brief Load the contents of the global method pool for a given /// selector. diff --git a/include/clang/Serialization/GlobalModuleIndex.h b/include/clang/Serialization/GlobalModuleIndex.h index eaf26d1df1..ab91f4009c 100644 --- a/include/clang/Serialization/GlobalModuleIndex.h +++ b/include/clang/Serialization/GlobalModuleIndex.h @@ -146,6 +146,11 @@ public: static std::pair readIndex(StringRef Path); + /// \brief Returns an iterator for identifiers stored in the index table. + /// + /// The caller accepts ownership of the returned object. + IdentifierIterator *createIdentifierIterator() const; + /// \brief Retrieve the set of modules that have up-to-date indexes. /// /// \param ModuleFiles Will be populated with the set of module files that diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp index 429d9d8cb2..951c718d18 100644 --- a/lib/Basic/IdentifierTable.cpp +++ b/lib/Basic/IdentifierTable.cpp @@ -65,7 +65,7 @@ namespace { }; } -IdentifierIterator *IdentifierInfoLookup::getIdentifiers() const { +IdentifierIterator *IdentifierInfoLookup::getIdentifiers() { return new EmptyLookupIterator(); } diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 9893823a12..5123b79d2e 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -6109,7 +6109,10 @@ StringRef ASTIdentifierIterator::Next() { return Result; } -IdentifierIterator *ASTReader::getIdentifiers() const { +IdentifierIterator *ASTReader::getIdentifiers() { + if (!loadGlobalIndex()) + return GlobalIndex->createIdentifierIterator(); + return new ASTIdentifierIterator(*this); } diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp index f9acb84728..b6693e40e5 100644 --- a/lib/Serialization/GlobalModuleIndex.cpp +++ b/lib/Serialization/GlobalModuleIndex.cpp @@ -818,3 +818,34 @@ GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) { // We're done. return EC_None; } + +namespace { + class GlobalIndexIdentifierIterator : public IdentifierIterator { + /// \brief The current position within the identifier lookup table. + IdentifierIndexTable::key_iterator Current; + + /// \brief The end position within the identifier lookup table. + IdentifierIndexTable::key_iterator End; + + public: + explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) { + Current = Idx.key_begin(); + End = Idx.key_end(); + } + + virtual StringRef Next() { + if (Current == End) + return StringRef(); + + StringRef Result = *Current; + ++Current; + return Result; + } + }; +} + +IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const { + IdentifierIndexTable &Table = + *static_cast(IdentifierIndex); + return new GlobalIndexIdentifierIterator(Table); +} -- cgit v1.2.3-70-g09d2