aboutsummaryrefslogtreecommitdiff
path: root/lib/Serialization/ASTWriter.cpp
diff options
context:
space:
mode:
authorDouglas Gregor <dgregor@apple.com>2011-02-10 17:09:37 +0000
committerDouglas Gregor <dgregor@apple.com>2011-02-10 17:09:37 +0000
commitcfbf1c7536e016dc275139dd842d4a5f059a749f (patch)
treec895fe119037dbdd1fb98717446b9263255c3c82 /lib/Serialization/ASTWriter.cpp
parentc16bb76f25ada63864aa89827a9906c1579d2ad3 (diff)
Implement two related optimizations that make de-serialization of
AST/PCH files more lazy: - Don't preload all of the file source-location entries when reading the AST file. Instead, load them lazily, when needed. - Only look up header-search information (whether a header was already #import'd, how many times it's been included, etc.) when it's needed by the preprocessor, rather than pre-populating it. Previously, we would pre-load all of the file source-location entries, which also populated the header-search information structure. This was a relatively minor performance issue, since we would end up stat()'ing all of the headers stored within a AST/PCH file when the AST/PCH file was loaded. In the normal PCH use case, the stat()s were cached, so the cost--of preloading ~860 source-location entries in the Cocoa.h case---was relatively low. However, the recent optimization that replaced stat+open with open+fstat turned this into a major problem, since the preloading of source-location entries would now end up opening those files. Worse, those files wouldn't be closed until the file manager was destroyed, so just opening a Cocoa.h PCH file would hold on to ~860 file descriptors, and it was easy to blow through the process's limit on the number of open file descriptors. By eliminating the preloading of these files, we neither open nor stat the headers stored in the PCH/AST file until they're actually needed for something. Concretely, we went from *** HeaderSearch Stats: 835 files tracked. 364 #import/#pragma once files. 823 included exactly once. 6 max times a file is included. 3 #include/#include_next/#import. 0 #includes skipped due to the multi-include optimization. 1 framework lookups. 0 subframework lookups. *** Source Manager Stats: 835 files mapped, 3 mem buffers mapped. 37460 SLocEntry's allocated, 11215575B of Sloc address space used. 62 bytes of files mapped, 0 files with line #'s computed. with a trivial program that uses a chained PCH including a Cocoa PCH to *** HeaderSearch Stats: 4 files tracked. 1 #import/#pragma once files. 3 included exactly once. 2 max times a file is included. 3 #include/#include_next/#import. 0 #includes skipped due to the multi-include optimization. 1 framework lookups. 0 subframework lookups. *** Source Manager Stats: 3 files mapped, 3 mem buffers mapped. 37460 SLocEntry's allocated, 11215575B of Sloc address space used. 62 bytes of files mapped, 0 files with line #'s computed. for the same program. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@125286 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Serialization/ASTWriter.cpp')
-rw-r--r--lib/Serialization/ASTWriter.cpp154
1 files changed, 133 insertions, 21 deletions
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 624cf10349..e4fe99cde3 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <cstdio>
+#include <string.h>
using namespace clang;
using namespace clang::serialization;
@@ -739,7 +740,8 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(DECL_UPDATES);
RECORD(CXX_BASE_SPECIFIER_OFFSETS);
RECORD(DIAG_PRAGMA_MAPPINGS);
-
+ RECORD(HEADER_SEARCH_TABLE);
+
// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
RECORD(SM_SLOC_FILE_ENTRY);
@@ -1146,11 +1148,6 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
// FileEntry fields.
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
- // HeaderFileInfo fields.
- Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isImport
- Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // DirInfo
- Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumIncludes
- Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // ControllingMacro
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
return Stream.EmitAbbrev(Abbrev);
}
@@ -1193,6 +1190,135 @@ static unsigned CreateSLocInstantiationAbbrev(llvm::BitstreamWriter &Stream) {
return Stream.EmitAbbrev(Abbrev);
}
+namespace {
+ // Trait used for the on-disk hash table of header search information.
+ class HeaderFileInfoTrait {
+ ASTWriter &Writer;
+ HeaderSearch &HS;
+
+ public:
+ HeaderFileInfoTrait(ASTWriter &Writer, HeaderSearch &HS)
+ : Writer(Writer), HS(HS) { }
+
+ typedef const char *key_type;
+ typedef key_type key_type_ref;
+
+ typedef HeaderFileInfo data_type;
+ typedef const data_type &data_type_ref;
+
+ static unsigned ComputeHash(const char *path) {
+ // The hash is based only on the filename portion of the key, so that the
+ // reader can match based on filenames when symlinking or excess path
+ // elements ("foo/../", "../") change the form of the name. However,
+ // complete path is still the key.
+ return llvm::HashString(llvm::sys::path::filename(path));
+ }
+
+ std::pair<unsigned,unsigned>
+ EmitKeyDataLength(llvm::raw_ostream& Out, const char *path,
+ data_type_ref Data) {
+ unsigned StrLen = strlen(path);
+ clang::io::Emit16(Out, StrLen);
+ unsigned DataLen = 1 + 2 + 4;
+ clang::io::Emit8(Out, DataLen);
+ return std::make_pair(StrLen + 1, DataLen);
+ }
+
+ void EmitKey(llvm::raw_ostream& Out, const char *path, unsigned KeyLen) {
+ Out.write(path, KeyLen);
+ }
+
+ void EmitData(llvm::raw_ostream &Out, key_type_ref,
+ data_type_ref Data, unsigned DataLen) {
+ using namespace clang::io;
+ uint64_t Start = Out.tell(); (void)Start;
+
+ unsigned char Flags = (Data.isImport << 3)
+ | (Data.DirInfo << 1)
+ | Data.Resolved;
+ Emit8(Out, (uint8_t)Flags);
+ Emit16(Out, (uint16_t) Data.NumIncludes);
+
+ if (!Data.ControllingMacro)
+ Emit32(Out, (uint32_t)Data.ControllingMacroID);
+ else
+ Emit32(Out, (uint32_t)Writer.getIdentifierRef(Data.ControllingMacro));
+ assert(Out.tell() - Start == DataLen && "Wrong data length");
+ }
+ };
+} // end anonymous namespace
+
+/// \brief Write the header search block for the list of files that
+///
+/// \param HS The header search structure to save.
+///
+/// \param Chain Whether we're creating a chained AST file.
+void ASTWriter::WriteHeaderSearch(HeaderSearch &HS, const char* isysroot) {
+ llvm::SmallVector<const FileEntry *, 16> FilesByUID;
+ HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
+
+ if (FilesByUID.size() > HS.header_file_size())
+ FilesByUID.resize(HS.header_file_size());
+
+ HeaderFileInfoTrait GeneratorTrait(*this, HS);
+ OnDiskChainedHashTableGenerator<HeaderFileInfoTrait> Generator;
+ llvm::SmallVector<const char *, 4> SavedStrings;
+ unsigned NumHeaderSearchEntries = 0;
+ for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
+ const FileEntry *File = FilesByUID[UID];
+ if (!File)
+ continue;
+
+ const HeaderFileInfo &HFI = HS.header_file_begin()[UID];
+ if (HFI.External && Chain)
+ continue;
+
+ // Turn the file name into an absolute path, if it isn't already.
+ const char *Filename = File->getName();
+ Filename = adjustFilenameForRelocatablePCH(Filename, isysroot);
+
+ // If we performed any translation on the file name at all, we need to
+ // save this string, since the generator will refer to it later.
+ if (Filename != File->getName()) {
+ Filename = strdup(Filename);
+ SavedStrings.push_back(Filename);
+ }
+
+ Generator.insert(Filename, HFI, GeneratorTrait);
+ ++NumHeaderSearchEntries;
+ }
+
+ // Create the on-disk hash table in a buffer.
+ llvm::SmallString<4096> TableData;
+ uint32_t BucketOffset;
+ {
+ llvm::raw_svector_ostream Out(TableData);
+ // Make sure that no bucket is at offset 0
+ clang::io::Emit32(Out, 0);
+ BucketOffset = Generator.Emit(Out, GeneratorTrait);
+ }
+
+ // Create a blob abbreviation
+ using namespace llvm;
+ BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+ Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+ unsigned TableAbbrev = Stream.EmitAbbrev(Abbrev);
+
+ // Write the stat cache
+ RecordData Record;
+ Record.push_back(HEADER_SEARCH_TABLE);
+ Record.push_back(BucketOffset);
+ Record.push_back(NumHeaderSearchEntries);
+ Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData.str());
+
+ // Free all of the strings we had to duplicate.
+ for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I)
+ free((void*)SavedStrings[I]);
+}
+
/// \brief Writes the block containing the serialized form of the
/// source manager.
///
@@ -1294,16 +1420,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
Record.push_back(Content->Entry->getSize());
Record.push_back(Content->Entry->getModificationTime());
- // Emit header-search information associated with this file.
- HeaderFileInfo HFI;
- HeaderSearch &HS = PP.getHeaderSearchInfo();
- if (Content->Entry->getUID() < HS.header_file_size())
- HFI = HS.header_file_begin()[Content->Entry->getUID()];
- Record.push_back(HFI.isImport);
- Record.push_back(HFI.DirInfo);
- Record.push_back(HFI.NumIncludes);
- AddIdentifierRef(HFI.ControllingMacro, Record);
-
// Turn the file name into an absolute path, if it isn't already.
const char *Filename = Content->Entry->getName();
llvm::SmallString<128> FilePath(Filename);
@@ -1312,11 +1428,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
Filename = adjustFilenameForRelocatablePCH(Filename, isysroot);
Stream.EmitRecordWithBlob(SLocFileAbbrv, Record, Filename);
-
- // FIXME: For now, preload all file source locations, so that
- // we get the appropriate File entries in the reader. This is
- // a temporary measure.
- PreloadSLocs.push_back(BaseSLocID + SLocEntryOffsets.size());
} else {
// The source location entry is a buffer. The blob associated
// with this entry contains the contents of the buffer.
@@ -2596,6 +2707,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef, MemorizeStatCalls *StatCalls,
Stream.ExitBlock();
WritePreprocessor(PP);
+ WriteHeaderSearch(PP.getHeaderSearchInfo(), isysroot);
WriteSelectors(SemaRef);
WriteReferencedSelectorsPool(SemaRef);
WriteIdentifierTable(PP);