aboutsummaryrefslogtreecommitdiff
path: root/include/clang/Basic/IdentifierTable.h
diff options
context:
space:
mode:
authorTed Kremenek <kremenek@apple.com>2009-01-20 23:28:34 +0000
committerTed Kremenek <kremenek@apple.com>2009-01-20 23:28:34 +0000
commitea9c26b3dbd74a1497f5609ae6e19a85f42b6073 (patch)
tree636795bd11a70b5b554caf5e7cce4a3e6bdbe979 /include/clang/Basic/IdentifierTable.h
parentd427023c334fe03105d9359711a3df4d6f23b344 (diff)
Fix: <rdar://problem/6510344> [pth] PTH slows down regular lexer considerably (when it has substantial work)
Changes to IdentifierTable: - High-level summary: StringMap never owns IdentifierInfos. It just references them. - The string map now has StringMapEntry<IdentifierInfo*> instead of StringMapEntry<IdentifierInfo>. The IdentifierInfo object is allocated using the same bump pointer allocator as used by the StringMap. Changes to IdentifierInfo: - Added an extra pointer to point to the StringMapEntry<IdentifierInfo*> in the string map. This pointer will be null if the IdentifierInfo* is *only* used by the PTHLexer (that is it isn't in the StringMap). Algorithmic changes: - Non-PTH case: IdentifierInfo::get() will always consult the StringMap first to see if we have an IdentifierInfo object. If that StringMapEntry references a null pointer, we allocate a new one from the BumpPtrAllocator and update the reference in the StringMapEntry. - PTH case: We do the same lookup as with the non-PTH case, but if we don't get a hit in the StringMap we do a secondary lookup in the PTHManager for the IdentifierInfo. If we don't find an IdentifierInfo we create a new one as in the non-PTH case. If we do find and IdentifierInfo in the PTHManager, we update the StringMapEntry to refer to it so that the IdentifierInfo will be found on the next StringMap lookup. This way we only do a binary search in the PTH file at most once for a given IdentifierInfo. This greatly speeds things up for source files containing a non-trivial amount of code. Performance impact: While these changes do add some extra indirection in IdentifierTable to access an IdentifierInfo*, I saw speedups even in the non-PTH case as well. Non-PTH: For -fsyntax-only on Cocoa.h, we see a 6% speedup. PTH (with Cocoa.h in token cache): 11% speedup. I also did an experiment where we did -fsyntax-only on a source file including a large header and Cocoa.h, but the token cache did not contain the larger header. For this file, we were seeing a performance *regression* when using PTH of 3% over non-PTH. Now we are seeing a performance improvement of 9%! Tests: The serialization tests are now failing. I looked at this extensively, and I my belief is that this change is unmasking a bug rather than introducing a new one. I have disabled the serialization tests for now. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62636 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/IdentifierTable.h')
-rw-r--r--include/clang/Basic/IdentifierTable.h79
1 files changed, 47 insertions, 32 deletions
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index 7a4db7c02a..167e049262 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -31,6 +31,7 @@ namespace llvm {
namespace clang {
struct LangOptions;
class IdentifierInfo;
+ class IdentifierTable;
class SourceLocation;
class MultiKeywordSelector; // private class used by Selector
class DeclarationName; // AST class that stores declaration names
@@ -56,14 +57,19 @@ class IdentifierInfo {
bool IsExtension : 1; // True if identifier is a lang extension.
bool IsPoisoned : 1; // True if identifier is poisoned.
bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword.
- bool IndirectString : 1; // True if the string is stored indirectly.
// 9 bits left in 32-bit word.
void *FETokenInfo; // Managed by the language front-end.
+ llvm::StringMapEntry<IdentifierInfo*> *Entry;
+
IdentifierInfo(const IdentifierInfo&); // NONCOPYABLE.
void operator=(const IdentifierInfo&); // NONASSIGNABLE.
+
+ friend class IdentifierTable;
+
public:
- IdentifierInfo(bool usesIndirectString = false);
+ IdentifierInfo();
+
/// isStr - Return true if this is the identifier for the specified string.
/// This is intended to be used for string literals only: II->isStr("foo").
template <std::size_t StrLen>
@@ -74,36 +80,26 @@ public:
/// getName - Return the actual string for this identifier. The returned
/// string is properly null terminated.
///
- const char *getName() const {
- if (IndirectString) {
- // The 'this' pointer really points to a
- // std::pair<IdentifierInfo, const char*>, where internal pointer
- // points to the external string data.
- return ((std::pair<IdentifierInfo, const char*>*) this)->second + 4;
- }
-
- // We know that this is embedded into a StringMapEntry, and it knows how to
- // efficiently find the string.
- return llvm::StringMapEntry<IdentifierInfo>::
- GetStringMapEntryFromValue(*this).getKeyData();
+ const char *getName() const {
+ if (Entry) return Entry->getKeyData();
+ // The 'this' pointer really points to a
+ // std::pair<IdentifierInfo, const char*>, where internal pointer
+ // points to the external string data.
+ return ((std::pair<IdentifierInfo, const char*>*) this)->second + 4;
}
/// getLength - Efficiently return the length of this identifier info.
///
unsigned getLength() const {
- if (IndirectString) {
- // The 'this' pointer really points to a
- // std::pair<IdentifierInfo, const char*>, where internal pointer
- // points to the external string data.
- const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second;
- return ((unsigned) p[0])
- | (((unsigned) p[1]) << 8)
- | (((unsigned) p[2]) << 16)
- | (((unsigned) p[3]) << 24);
- }
-
- return llvm::StringMapEntry<IdentifierInfo>::
- GetStringMapEntryFromValue(*this).getKeyLength();
+ if (Entry) return Entry->getKeyLength();
+ // The 'this' pointer really points to a
+ // std::pair<IdentifierInfo, const char*>, where internal pointer
+ // points to the external string data.
+ const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second;
+ return ((unsigned) p[0])
+ | (((unsigned) p[1]) << 8)
+ | (((unsigned) p[2]) << 16)
+ | (((unsigned) p[3]) << 24);
}
/// hasMacroDefinition - Return true if this identifier is #defined to some
@@ -202,7 +198,7 @@ public:
class IdentifierTable {
// Shark shows that using MallocAllocator is *much* slower than using this
// BumpPtrAllocator!
- typedef llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator> HashTableTy;
+ typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
HashTableTy HashTable;
IdentifierInfoLookup* ExternalLookup;
@@ -220,12 +216,29 @@ public:
/// get - Return the identifier token info for the specified named identifier.
///
IdentifierInfo &get(const char *NameStart, const char *NameEnd) {
- if (ExternalLookup) {
- IdentifierInfo* II = ExternalLookup->get(NameStart, NameEnd);
- if (II) return *II;
+ llvm::StringMapEntry<IdentifierInfo*> &Entry =
+ HashTable.GetOrCreateValue(NameStart, NameEnd, 0);
+
+ IdentifierInfo *II = Entry.getValue();
+
+ if (!II) {
+ while (1) {
+ if (ExternalLookup) {
+ II = ExternalLookup->get(NameStart, NameEnd);
+ if (II) break;
+ }
+
+ void *Mem = getAllocator().Allocate<IdentifierInfo>();
+ II = new (Mem) IdentifierInfo();
+ break;
+ }
+
+ Entry.setValue(II);
+ II->Entry = &Entry;
}
- return HashTable.GetOrCreateValue(NameStart, NameEnd).getValue();
+ assert(II->Entry != 0);
+ return *II;
}
IdentifierInfo &get(const char *Name) {
@@ -237,11 +250,13 @@ public:
return get(NameBytes, NameBytes+Name.size());
}
+private:
typedef HashTableTy::const_iterator iterator;
typedef HashTableTy::const_iterator const_iterator;
iterator begin() const { return HashTable.begin(); }
iterator end() const { return HashTable.end(); }
+public:
unsigned size() const { return HashTable.size(); }