aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/PTHLexer.cpp
diff options
context:
space:
mode:
authorTed Kremenek <kremenek@apple.com>2008-12-23 01:30:52 +0000
committerTed Kremenek <kremenek@apple.com>2008-12-23 01:30:52 +0000
commite5680f3cd678014cf0872d34726dc804b0cbbdd4 (patch)
tree5b9950de1461e2b7456082320fc5b304dab6b792 /lib/Lex/PTHLexer.cpp
parent2a7e58dc24b17b1cb900a1ee30ea328d665b1a64 (diff)
PTH:
- Embed 'eom' tokens in PTH file. - Use embedded 'eom' tokens to not lazily generate them in the PTHLexer. This means that PTHLexer can always advance to the next token after reading a token (instead of buffering tokens using a copy). - Moved logic of 'ReadToken' into Lex. GetToken & ReadToken no longer exist. - These changes result in a 3.3% speedup (-Eonly) on Cocoa.h. - The code is a little gross. Many cleanups are possible and should be done. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@61360 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/PTHLexer.cpp')
-rw-r--r--lib/Lex/PTHLexer.cpp203
1 files changed, 92 insertions, 111 deletions
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 6a6290f933..2c19fcd0d9 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -28,38 +28,64 @@ using namespace clang;
#define DISK_TOKEN_SIZE (2+3*4)
+//===----------------------------------------------------------------------===//
+// Utility methods for reading from the mmap'ed PTH file.
+//===----------------------------------------------------------------------===//
+
+static inline uint8_t Read8(const char*& data) {
+ return (uint8_t) *(data++);
+}
+
+static inline uint32_t Read32(const char*& data) {
+ uint32_t V = (uint32_t) Read8(data);
+ V |= (((uint32_t) Read8(data)) << 8);
+ V |= (((uint32_t) Read8(data)) << 16);
+ V |= (((uint32_t) Read8(data)) << 24);
+ return V;
+}
+
+//===----------------------------------------------------------------------===//
+// PTHLexer methods.
+//===----------------------------------------------------------------------===//
+
PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
const char* ppcond, PTHManager& PM)
: PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
- PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
- // Make sure the EofToken is completely clean.
- EofToken.startToken();
- }
+ PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {}
-Token PTHLexer::GetToken() {
- // Read the next token, or if we haven't advanced yet, get the last
- // token read.
- if (NeedsFetching) {
- NeedsFetching = false;
- ReadToken(LastFetched);
- }
+void PTHLexer::Lex(Token& Tok) {
+LexNextToken:
+
+ // Read the token.
+ // FIXME: Setting the flags directly should obviate this step.
+ Tok.startToken();
- Token Tok = LastFetched;
+ // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
+ // store back into the instance variable.
+ const char *CurPtrShadow = CurPtr;
- // If we are in raw mode, zero out identifier pointers. This is
- // needed for 'pragma poison'. Note that this requires that the Preprocessor
- // can go back to the original source when it calls getSpelling().
- if (LexingRawMode && Tok.is(tok::identifier))
- Tok.setIdentifierInfo(0);
+ // Read the type of the token.
+ Tok.setKind((tok::TokenKind) Read8(CurPtrShadow));
+
+ // Set flags. This is gross, since we are really setting multiple flags.
+ Tok.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
+
+ // Set the IdentifierInfo* (if any).
+ Tok.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
+
+ // Set the SourceLocation. Since all tokens are constructed using a
+ // raw lexer, they will all be offseted from the same FileID.
+ Tok.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
+
+ // Finally, read and set the length of the token.
+ Tok.setLength(Read32(CurPtrShadow));
- return Tok;
-}
+ CurPtr = CurPtrShadow;
-void PTHLexer::Lex(Token& Tok) {
-LexNextToken:
- Tok = GetToken();
-
- if (AtLastToken()) {
+ if (Tok.is(tok::eof)) {
+ // Save the end-of-file token.
+ EofToken = Tok;
+
Preprocessor *PPCache = PP;
if (LexEndOfFile(Tok))
@@ -68,19 +94,28 @@ LexNextToken:
assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
return PPCache->Lex(Tok);
}
+
+ MIOpt.ReadToken();
- // Don't advance to the next token yet. Check if we are at the
- // start of a new line and we're processing a directive. If so, we
- // consume this token twice, once as an tok::eom.
- if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
+ if (Tok.is(tok::eom)) {
ParsingPreprocessorDirective = false;
- Tok.setKind(tok::eom);
- MIOpt.ReadToken();
return;
}
- // Advance to the next token.
- AdvanceToken();
+#if 0
+ SourceManager& SM = PP->getSourceManager();
+ SourceLocation L = Tok.getLocation();
+
+ static const char* last = 0;
+ const char* next = SM.getContentCacheForLoc(L)->Entry->getName();
+ if (next != last) {
+ last = next;
+ llvm::cerr << next << '\n';
+ }
+
+ llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " <<
+ SM.getLogicalColumnNumber(L) << '\n';
+#endif
if (Tok.is(tok::hash)) {
if (Tok.isAtStartOfLine()) {
@@ -95,33 +130,31 @@ LexNextToken:
}
}
}
-
- MIOpt.ReadToken();
if (Tok.is(tok::identifier)) {
- if (LexingRawMode) return;
+ if (LexingRawMode) {
+ Tok.setIdentifierInfo(0);
+ return;
+ }
+
return PP->HandleIdentifier(Tok);
- }
+ }
+
+
+ assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective);
}
+// FIXME: This method can just be inlined into Lex().
bool PTHLexer::LexEndOfFile(Token &Tok) {
-
- if (ParsingPreprocessorDirective) {
- ParsingPreprocessorDirective = false;
- Tok.setKind(tok::eom);
- MIOpt.ReadToken();
- return true; // Have a token.
- }
-
- if (LexingRawMode) {
- MIOpt.ReadToken();
- return true; // Have an eof token.
- }
+ assert(!ParsingPreprocessorDirective);
+ assert(!LexingRawMode);
// FIXME: Issue diagnostics similar to Lexer.
return PP->HandleEndOfFile(Tok, false);
}
+// FIXME: We can just grab the last token instead of storing a copy
+// into EofToken.
void PTHLexer::setEOF(Token& Tok) {
assert(!EofToken.is(tok::eof));
Tok = EofToken;
@@ -131,6 +164,10 @@ void PTHLexer::DiscardToEndOfLine() {
assert(ParsingPreprocessorDirective && ParsingFilename == false &&
"Must be in a preprocessing directive!");
+ // We assume that if the preprocessor wishes to discard to the end of
+ // the line that it also means to end the current preprocessor directive.
+ ParsingPreprocessorDirective = false;
+
// Skip tokens by only peeking at their token kind and the flags.
// We don't need to actually reconstruct full tokens from the token buffer.
// This saves some copies and it also reduces IdentifierInfo* lookup.
@@ -139,7 +176,7 @@ void PTHLexer::DiscardToEndOfLine() {
// Read the token kind. Are we at the end of the file?
tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
if (x == tok::eof) break;
-
+
// Read the token flags. Are we at the start of the next line?
Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
if (y & Token::StartOfLine) break;
@@ -151,22 +188,6 @@ void PTHLexer::DiscardToEndOfLine() {
CurPtr = p;
}
-//===----------------------------------------------------------------------===//
-// Utility methods for reading from the mmap'ed PTH file.
-//===----------------------------------------------------------------------===//
-
-static inline uint8_t Read8(const char*& data) {
- return (uint8_t) *(data++);
-}
-
-static inline uint32_t Read32(const char*& data) {
- uint32_t V = (uint32_t) Read8(data);
- V |= (((uint32_t) Read8(data)) << 8);
- V |= (((uint32_t) Read8(data)) << 16);
- V |= (((uint32_t) Read8(data)) << 24);
- return V;
-}
-
/// SkipBlock - Used by Preprocessor to skip the current conditional block.
bool PTHLexer::SkipBlock() {
assert(CurPPCondPtr && "No cached PP conditional information.");
@@ -225,7 +246,6 @@ bool PTHLexer::SkipBlock() {
// By construction NextIdx will be zero if this is a #endif. This is useful
// to know to obviate lexing another token.
bool isEndif = NextIdx == 0;
- NeedsFetching = true;
// This case can occur when we see something like this:
//
@@ -240,7 +260,7 @@ bool PTHLexer::SkipBlock() {
assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
// Did we reach a #endif? If so, go ahead and consume that token as well.
if (isEndif)
- CurPtr += DISK_TOKEN_SIZE;
+ CurPtr += DISK_TOKEN_SIZE*2;
else
LastHashTokPtr = HashEntryI;
@@ -253,20 +273,13 @@ bool PTHLexer::SkipBlock() {
// Update the location of the last observed '#'. This is useful if we
// are skipping multiple blocks.
LastHashTokPtr = CurPtr;
-
-#ifndef DEBUG
- // In a debug build we should verify that the token is really a '#' that
- // appears at the start of the line.
- Token Tok;
- ReadToken(Tok);
- assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
-#else
- // In a full release build we can just skip the token entirely.
- CurPtr += DISK_TOKEN_SIZE;
-#endif
+ // Skip the '#' token.
+ assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
+ CurPtr += DISK_TOKEN_SIZE;
+
// Did we reach a #endif? If so, go ahead and consume that token as well.
- if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
+ if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
return isEndif;
}
@@ -287,38 +300,6 @@ SourceLocation PTHLexer::getSourceLocation() {
}
//===----------------------------------------------------------------------===//
-// Token reconstruction from the PTH file.
-//===----------------------------------------------------------------------===//
-
-void PTHLexer::ReadToken(Token& T) {
- // Clear the token.
- // FIXME: Setting the flags directly should obviate this step.
- T.startToken();
-
- // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
- // store back into the instance variable.
- const char *CurPtrShadow = CurPtr;
-
- // Read the type of the token.
- T.setKind((tok::TokenKind) Read8(CurPtrShadow));
-
- // Set flags. This is gross, since we are really setting multiple flags.
- T.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
-
- // Set the IdentifierInfo* (if any).
- T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
-
- // Set the SourceLocation. Since all tokens are constructed using a
- // raw lexer, they will all be offseted from the same FileID.
- T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
-
- // Finally, read and set the length of the token.
- T.setLength(Read32(CurPtrShadow));
-
- CurPtr = CurPtrShadow;
-}
-
-//===----------------------------------------------------------------------===//
// Internal Data Structures for PTH file lookup and resolving identifiers.
//===----------------------------------------------------------------------===//