aboutsummaryrefslogtreecommitdiff
path: root/include/clang
diff options
context:
space:
mode:
authorDouglas Gregor <dgregor@apple.com>2009-04-21 22:25:48 +0000
committerDouglas Gregor <dgregor@apple.com>2009-04-21 22:25:48 +0000
commit668c1a4fdcc56bdd050256b1688e116fe84b72db (patch)
treebd72a7861c9d2ba6b5f1b37cd41aa12910c20a8b /include/clang
parent5b54b88c4082bb81b8b341b622fda9a85cbd5fad (diff)
Lazy deserialization of the declaration chains associated with
identifiers from a precompiled header. This patch changes the primary name lookup method for entities within a precompiled header. Previously, we would load all of the names of declarations at translation unit scope into a large DenseMap (inside the TranslationUnitDecl's DeclContext), and then perform a special "last resort" lookup into this DeclContext when we knew there was a PCH file (see Sema::LookupName). Now, when we see an identifier named for the first time, we load all of the declarations with that name that are visible from the translation unit into the IdentifierInfo's chain of declarations. Thus, the explicit "look into the translation unit's DeclContext" code is gone, and Sema effectively uses the same IdentifierInfo-based name lookup mechanism whether we are using a PCH file or not. This approach should help PCH scale with the size of the input program rather than the size of the PCH file. The "Hello, World!" application with Carbon.h as a PCH file now loads 20% of the identifiers in the PCH file rather than 85% of the identifiers. 90% of the 20% of identifiers loaded are actually loaded when we deserialize the preprocessor state. The next step is to make the preprocessor load macros lazily, which should drastically reduce the number of types, declarations, and identifiers loaded for "Hello, World". git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69737 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang')
-rw-r--r--include/clang/AST/ExternalASTSource.h12
-rw-r--r--include/clang/Basic/IdentifierTable.h38
-rw-r--r--include/clang/Basic/OnDiskHashTable.h29
-rw-r--r--include/clang/Frontend/PCHReader.h60
-rw-r--r--include/clang/Sema/ExternalSemaSource.h45
-rw-r--r--include/clang/Sema/SemaConsumer.h2
6 files changed, 162 insertions, 24 deletions
diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h
index 267b4838a4..ef09770933 100644
--- a/include/clang/AST/ExternalASTSource.h
+++ b/include/clang/AST/ExternalASTSource.h
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the ExternalASTSource interface,
+// This file defines the ExternalASTSource interface, which enables
+// construction of AST nodes from some external source.x
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_EXTERNAL_AST_SOURCE_H
@@ -22,6 +23,7 @@ namespace clang {
class ASTConsumer;
class Decl;
class DeclContext;
+class ExternalSemaSource; // layering violation required for downcasting
class Stmt;
/// \brief The deserialized representation of a set of declarations
@@ -44,7 +46,15 @@ struct VisibleDeclaration {
/// actual type and declaration nodes, and read parts of declaration
/// contexts.
class ExternalASTSource {
+ /// \brief Whether this AST source also provides information for
+ /// semantic analysis.
+ bool SemaSource;
+
+ friend class ExternalSemaSource;
+
public:
+ ExternalASTSource() : SemaSource(false) { }
+
virtual ~ExternalASTSource();
/// \brief Resolve a type ID into a type, potentially building a new
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index d4f680494e..3156bbc4e9 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -229,7 +229,7 @@ private:
};
/// IdentifierInfoLookup - An abstract class used by IdentifierTable that
-/// provides an interface for for performing lookups from strings
+/// provides an interface for performing lookups from strings
/// (const char *) to IdentiferInfo objects.
class IdentifierInfoLookup {
public:
@@ -260,6 +260,11 @@ public:
IdentifierTable(const LangOptions &LangOpts,
IdentifierInfoLookup* externalLookup = 0);
+ /// \brief Set the external identifier lookup mechanism.
+ void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
+ ExternalLookup = IILookup;
+ }
+
llvm::BumpPtrAllocator& getAllocator() {
return HashTable.getAllocator();
}
@@ -295,6 +300,34 @@ public:
return *II;
}
+ /// \brief Creates a new IdentifierInfo from the given string.
+ ///
+ /// This is a lower-level version of get() that requires that this
+ /// identifier not be known previously and that does not consult an
+ /// external source for identifiers. In particular, external
+ /// identifier sources can use this routine to build IdentifierInfo
+ /// nodes and then introduce additional information about those
+ /// identifiers.
+ IdentifierInfo &CreateIdentifierInfo(const char *NameStart,
+ const char *NameEnd) {
+ llvm::StringMapEntry<IdentifierInfo*> &Entry =
+ HashTable.GetOrCreateValue(NameStart, NameEnd);
+
+ IdentifierInfo *II = Entry.getValue();
+ assert(!II && "IdentifierInfo already exists");
+
+ // Lookups failed, make a new IdentifierInfo.
+ void *Mem = getAllocator().Allocate<IdentifierInfo>();
+ II = new (Mem) IdentifierInfo();
+ Entry.setValue(II);
+
+ // Make sure getName() knows how to find the IdentifierInfo
+ // contents.
+ II->Entry = &Entry;
+
+ return *II;
+ }
+
IdentifierInfo &get(const char *Name) {
return get(Name, Name+strlen(Name));
}
@@ -304,14 +337,11 @@ public:
return get(NameBytes, NameBytes+Name.size());
}
-private:
typedef HashTableTy::const_iterator iterator;
typedef HashTableTy::const_iterator const_iterator;
iterator begin() const { return HashTable.begin(); }
iterator end() const { return HashTable.end(); }
-public:
-
unsigned size() const { return HashTable.size(); }
/// PrintStats - Print some statistics to stderr that indicate how well the
diff --git a/include/clang/Basic/OnDiskHashTable.h b/include/clang/Basic/OnDiskHashTable.h
index 631d497f1a..3caeb9ffd8 100644
--- a/include/clang/Basic/OnDiskHashTable.h
+++ b/include/clang/Basic/OnDiskHashTable.h
@@ -242,6 +242,8 @@ class OnDiskChainedHashTable {
const unsigned NumEntries;
const unsigned char* const Buckets;
const unsigned char* const Base;
+ Info InfoObj;
+
public:
typedef typename Info::internal_key_type internal_key_type;
typedef typename Info::external_key_type external_key_type;
@@ -249,9 +251,10 @@ public:
OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
const unsigned char* buckets,
- const unsigned char* base)
+ const unsigned char* base,
+ const Info &InfoObj = Info())
: NumBuckets(numBuckets), NumEntries(numEntries),
- Buckets(buckets), Base(base) {
+ Buckets(buckets), Base(base), InfoObj(InfoObj) {
assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
"'buckets' must have a 4-byte alignment");
}
@@ -267,22 +270,27 @@ public:
internal_key_type key;
const unsigned char* const data;
const unsigned len;
+ Info *InfoObj;
public:
iterator() : data(0), len(0) {}
- iterator(const internal_key_type k, const unsigned char* d, unsigned l)
- : key(k), data(d), len(l) {}
+ iterator(const internal_key_type k, const unsigned char* d, unsigned l,
+ Info *InfoObj)
+ : key(k), data(d), len(l), InfoObj(InfoObj) {}
- data_type operator*() const { return Info::ReadData(key, data, len); }
+ data_type operator*() const { return InfoObj->ReadData(key, data, len); }
bool operator==(const iterator& X) const { return X.data == data; }
bool operator!=(const iterator& X) const { return X.data != data; }
};
- iterator find(const external_key_type& eKey) {
+ iterator find(const external_key_type& eKey, Info *InfoPtr = 0) {
+ if (!InfoPtr)
+ InfoPtr = &InfoObj;
+
using namespace io;
const internal_key_type& iKey = Info::GetInternalKey(eKey);
unsigned key_hash = Info::ComputeHash(iKey);
- // Each bucket is just a 32-bit offset into the PTH file.
+ // Each bucket is just a 32-bit offset into the hash table file.
unsigned idx = key_hash & (NumBuckets - 1);
const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
@@ -319,7 +327,7 @@ public:
}
// The key matches!
- return iterator(X, Items + L.first, L.second);
+ return iterator(X, Items + L.first, L.second, InfoPtr);
}
return iterator();
@@ -329,7 +337,8 @@ public:
static OnDiskChainedHashTable* Create(const unsigned char* buckets,
- const unsigned char* const base) {
+ const unsigned char* const base,
+ const Info &InfoObj = Info()) {
using namespace io;
assert(buckets > base);
assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
@@ -338,7 +347,7 @@ public:
unsigned numBuckets = ReadLE32(buckets);
unsigned numEntries = ReadLE32(buckets);
return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
- base);
+ base, InfoObj);
}
};
diff --git a/include/clang/Frontend/PCHReader.h b/include/clang/Frontend/PCHReader.h
index 6ee549abf5..3af147f7a9 100644
--- a/include/clang/Frontend/PCHReader.h
+++ b/include/clang/Frontend/PCHReader.h
@@ -15,9 +15,10 @@
#include "clang/Frontend/PCHBitCodes.h"
#include "clang/AST/DeclarationName.h"
-#include "clang/AST/ExternalASTSource.h"
+#include "clang/Sema/ExternalSemaSource.h"
#include "clang/AST/Type.h"
#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
@@ -44,7 +45,9 @@ class Decl;
class DeclContext;
class GotoStmt;
class LabelStmt;
+class NamedDecl;
class Preprocessor;
+class Sema;
class SwitchCase;
/// \brief Reads a precompiled head containing the contents of a
@@ -59,11 +62,15 @@ class SwitchCase;
/// The PCH reader provides lazy de-serialization of declarations, as
/// required when traversing the AST. Only those AST nodes that are
/// actually required will be de-serialized.
-class PCHReader : public ExternalASTSource {
+class PCHReader : public ExternalSemaSource, public IdentifierInfoLookup {
public:
enum PCHReadResult { Success, Failure, IgnorePCH };
private:
+ /// \brief The semantic analysis object that will be processing the
+ /// PCH file and the translation unit that uses it.
+ Sema *SemaObj;
+
/// \brief The preprocessor that will be loading the source file.
Preprocessor &PP;
@@ -116,8 +123,14 @@ private:
/// DeclContext.
DeclContextOffsetsMap DeclContextOffsets;
- /// \brief String data for the identifiers in the PCH file.
- const char *IdentifierTable;
+ /// \brief Actual data for the on-disk hash table.
+ ///
+ /// FIXME: This will eventually go away.
+ const char *IdentifierTableData;
+
+ /// \brief A pointer to an on-disk hash table of opaque type
+ /// IdentifierHashTable.
+ void *IdentifierLookupTable;
/// \brief String data for identifiers, indexed by the identifier ID
/// minus one.
@@ -158,10 +171,10 @@ private:
/// in the PCH file.
unsigned TotalNumStatements;
- /// \brief
+ /// \brief FIXME: document!
llvm::SmallVector<uint64_t, 4> SpecialTypes;
- PCHReadResult ReadPCHBlock();
+ PCHReadResult ReadPCHBlock(uint64_t &PreprocessorBlockOffset);
bool CheckPredefinesBuffer(const char *PCHPredef,
unsigned PCHPredefLen,
FileID PCHBufferID);
@@ -179,8 +192,9 @@ private:
public:
typedef llvm::SmallVector<uint64_t, 64> RecordData;
- PCHReader(Preprocessor &PP, ASTContext &Context)
- : PP(PP), Context(Context), IdentifierTable(0), NumStatementsRead(0) { }
+ explicit PCHReader(Preprocessor &PP, ASTContext &Context)
+ : SemaObj(0), PP(PP), Context(Context),
+ IdentifierTableData(0), NumStatementsRead(0) { }
~PCHReader() {}
@@ -246,6 +260,23 @@ public:
/// \brief Print some statistics about PCH usage.
virtual void PrintStats();
+ /// \brief Initialize the semantic source with the Sema instance
+ /// being used to perform semantic analysis on the abstract syntax
+ /// tree.
+ virtual void InitializeSema(Sema &S);
+
+ /// \brief Retrieve the IdentifierInfo for the named identifier.
+ ///
+ /// This routine builds a new IdentifierInfo for the given
+ /// identifier. If any declarations with this name are visible from
+ /// translation unit scope, their declarations will be deserialized
+ /// and introduced into the declaration chain of the
+ /// identifier. FIXME: if this identifier names a macro, deserialize
+ /// the macro.
+ virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd);
+
+ void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II);
+
/// \brief Report a diagnostic.
DiagnosticBuilder Diag(unsigned DiagID);
@@ -284,9 +315,22 @@ public:
/// supplements.
ASTContext &getContext() { return Context; }
+ // FIXME: temporary hack to store declarations that we deserialized
+ // before we had access to the Sema object.
+ llvm::SmallVector<NamedDecl *, 16> TUDecls;
+
+ /// \brief Retrieve the semantic analysis object used to analyze the
+ /// translation unit in which the precompiled header is being
+ /// imported.
+ Sema *getSema() { return SemaObj; }
+
/// \brief Retrieve the stream that this PCH reader is reading from.
llvm::BitstreamReader &getStream() { return Stream; }
+ /// \brief Retrieve the identifier table associated with the
+ /// preprocessor.
+ IdentifierTable &getIdentifierTable();
+
/// \brief Record that the given ID maps to the given switch-case
/// statement.
void RecordSwitchCaseID(SwitchCase *SC, unsigned ID);
diff --git a/include/clang/Sema/ExternalSemaSource.h b/include/clang/Sema/ExternalSemaSource.h
new file mode 100644
index 0000000000..1c216e4b86
--- /dev/null
+++ b/include/clang/Sema/ExternalSemaSource.h
@@ -0,0 +1,45 @@
+//===--- ExternalSemaSource.h - External Sema Interface ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ExternalSemaSource interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+#define LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+
+#include "clang/AST/ExternalASTSource.h"
+
+namespace clang {
+
+class Sema;
+
+/// \brief An abstract interface that should be implemented by
+/// external AST sources that also provide information for semantic
+/// analysis.
+class ExternalSemaSource : public ExternalASTSource {
+public:
+ ExternalSemaSource() {
+ ExternalASTSource::SemaSource = true;
+ }
+
+ /// \brief Initialize the semantic source with the Sema instance
+ /// being used to perform semantic analysis on the abstract syntax
+ /// tree.
+ virtual void InitializeSema(Sema &S) {}
+
+ // isa/cast/dyn_cast support
+ static bool classof(const ExternalASTSource *Source) {
+ return Source->SemaSource;
+ }
+ static bool classof(const ExternalSemaSource *) { return true; }
+};
+
+} // end namespace clang
+
+#endif
diff --git a/include/clang/Sema/SemaConsumer.h b/include/clang/Sema/SemaConsumer.h
index 25d4253390..e821947035 100644
--- a/include/clang/Sema/SemaConsumer.h
+++ b/include/clang/Sema/SemaConsumer.h
@@ -25,7 +25,7 @@ namespace clang {
/// analysis of the entities in those ASTs.
class SemaConsumer : public ASTConsumer {
public:
- explicit SemaConsumer() {
+ SemaConsumer() {
ASTConsumer::SemaConsumer = true;
}