diff options
author | Ted Kremenek <kremenek@apple.com> | 2008-12-23 01:30:52 +0000 |
---|---|---|
committer | Ted Kremenek <kremenek@apple.com> | 2008-12-23 01:30:52 +0000 |
commit | e5680f3cd678014cf0872d34726dc804b0cbbdd4 (patch) | |
tree | 5b9950de1461e2b7456082320fc5b304dab6b792 /lib/Lex/PTHLexer.cpp | |
parent | 2a7e58dc24b17b1cb900a1ee30ea328d665b1a64 (diff) |
PTH:
- Embed 'eom' tokens in PTH file.
- Use embedded 'eom' tokens to not lazily generate them in the PTHLexer.
This means that PTHLexer can always advance to the next token after
reading a token (instead of buffering tokens using a copy).
- Moved logic of 'ReadToken' into Lex. GetToken & ReadToken no longer exist.
- These changes result in a 3.3% speedup (-Eonly) on Cocoa.h.
- The code is a little gross. Many cleanups are possible and should be done.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@61360 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/PTHLexer.cpp')
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 203 |
1 files changed, 92 insertions, 111 deletions
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index 6a6290f933..2c19fcd0d9 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -28,38 +28,64 @@ using namespace clang; #define DISK_TOKEN_SIZE (2+3*4) +//===----------------------------------------------------------------------===// +// Utility methods for reading from the mmap'ed PTH file. +//===----------------------------------------------------------------------===// + +static inline uint8_t Read8(const char*& data) { + return (uint8_t) *(data++); +} + +static inline uint32_t Read32(const char*& data) { + uint32_t V = (uint32_t) Read8(data); + V |= (((uint32_t) Read8(data)) << 8); + V |= (((uint32_t) Read8(data)) << 16); + V |= (((uint32_t) Read8(data)) << 24); + return V; +} + +//===----------------------------------------------------------------------===// +// PTHLexer methods. +//===----------------------------------------------------------------------===// + PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, const char* ppcond, PTHManager& PM) : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), - PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) { - // Make sure the EofToken is completely clean. - EofToken.startToken(); - } + PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {} -Token PTHLexer::GetToken() { - // Read the next token, or if we haven't advanced yet, get the last - // token read. - if (NeedsFetching) { - NeedsFetching = false; - ReadToken(LastFetched); - } +void PTHLexer::Lex(Token& Tok) { +LexNextToken: + + // Read the token. + // FIXME: Setting the flags directly should obviate this step. + Tok.startToken(); - Token Tok = LastFetched; + // Shadow CurPtr into an automatic variable so that Read8 doesn't load and + // store back into the instance variable. + const char *CurPtrShadow = CurPtr; - // If we are in raw mode, zero out identifier pointers. This is - // needed for 'pragma poison'. Note that this requires that the Preprocessor - // can go back to the original source when it calls getSpelling(). - if (LexingRawMode && Tok.is(tok::identifier)) - Tok.setIdentifierInfo(0); + // Read the type of the token. + Tok.setKind((tok::TokenKind) Read8(CurPtrShadow)); + + // Set flags. This is gross, since we are really setting multiple flags. + Tok.setFlag((Token::TokenFlags) Read8(CurPtrShadow)); + + // Set the IdentifierInfo* (if any). + Tok.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow)); + + // Set the SourceLocation. Since all tokens are constructed using a + // raw lexer, they will all be offseted from the same FileID. + Tok.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow))); + + // Finally, read and set the length of the token. + Tok.setLength(Read32(CurPtrShadow)); - return Tok; -} + CurPtr = CurPtrShadow; -void PTHLexer::Lex(Token& Tok) { -LexNextToken: - Tok = GetToken(); - - if (AtLastToken()) { + if (Tok.is(tok::eof)) { + // Save the end-of-file token. + EofToken = Tok; + Preprocessor *PPCache = PP; if (LexEndOfFile(Tok)) @@ -68,19 +94,28 @@ LexNextToken: assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); return PPCache->Lex(Tok); } + + MIOpt.ReadToken(); - // Don't advance to the next token yet. Check if we are at the - // start of a new line and we're processing a directive. If so, we - // consume this token twice, once as an tok::eom. - if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { + if (Tok.is(tok::eom)) { ParsingPreprocessorDirective = false; - Tok.setKind(tok::eom); - MIOpt.ReadToken(); return; } - // Advance to the next token. - AdvanceToken(); +#if 0 + SourceManager& SM = PP->getSourceManager(); + SourceLocation L = Tok.getLocation(); + + static const char* last = 0; + const char* next = SM.getContentCacheForLoc(L)->Entry->getName(); + if (next != last) { + last = next; + llvm::cerr << next << '\n'; + } + + llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " << + SM.getLogicalColumnNumber(L) << '\n'; +#endif if (Tok.is(tok::hash)) { if (Tok.isAtStartOfLine()) { @@ -95,33 +130,31 @@ LexNextToken: } } } - - MIOpt.ReadToken(); if (Tok.is(tok::identifier)) { - if (LexingRawMode) return; + if (LexingRawMode) { + Tok.setIdentifierInfo(0); + return; + } + return PP->HandleIdentifier(Tok); - } + } + + + assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective); } +// FIXME: This method can just be inlined into Lex(). bool PTHLexer::LexEndOfFile(Token &Tok) { - - if (ParsingPreprocessorDirective) { - ParsingPreprocessorDirective = false; - Tok.setKind(tok::eom); - MIOpt.ReadToken(); - return true; // Have a token. - } - - if (LexingRawMode) { - MIOpt.ReadToken(); - return true; // Have an eof token. - } + assert(!ParsingPreprocessorDirective); + assert(!LexingRawMode); // FIXME: Issue diagnostics similar to Lexer. return PP->HandleEndOfFile(Tok, false); } +// FIXME: We can just grab the last token instead of storing a copy +// into EofToken. void PTHLexer::setEOF(Token& Tok) { assert(!EofToken.is(tok::eof)); Tok = EofToken; @@ -131,6 +164,10 @@ void PTHLexer::DiscardToEndOfLine() { assert(ParsingPreprocessorDirective && ParsingFilename == false && "Must be in a preprocessing directive!"); + // We assume that if the preprocessor wishes to discard to the end of + // the line that it also means to end the current preprocessor directive. + ParsingPreprocessorDirective = false; + // Skip tokens by only peeking at their token kind and the flags. // We don't need to actually reconstruct full tokens from the token buffer. // This saves some copies and it also reduces IdentifierInfo* lookup. @@ -139,7 +176,7 @@ void PTHLexer::DiscardToEndOfLine() { // Read the token kind. Are we at the end of the file? tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; if (x == tok::eof) break; - + // Read the token flags. Are we at the start of the next line? Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; if (y & Token::StartOfLine) break; @@ -151,22 +188,6 @@ void PTHLexer::DiscardToEndOfLine() { CurPtr = p; } -//===----------------------------------------------------------------------===// -// Utility methods for reading from the mmap'ed PTH file. -//===----------------------------------------------------------------------===// - -static inline uint8_t Read8(const char*& data) { - return (uint8_t) *(data++); -} - -static inline uint32_t Read32(const char*& data) { - uint32_t V = (uint32_t) Read8(data); - V |= (((uint32_t) Read8(data)) << 8); - V |= (((uint32_t) Read8(data)) << 16); - V |= (((uint32_t) Read8(data)) << 24); - return V; -} - /// SkipBlock - Used by Preprocessor to skip the current conditional block. bool PTHLexer::SkipBlock() { assert(CurPPCondPtr && "No cached PP conditional information."); @@ -225,7 +246,6 @@ bool PTHLexer::SkipBlock() { // By construction NextIdx will be zero if this is a #endif. This is useful // to know to obviate lexing another token. bool isEndif = NextIdx == 0; - NeedsFetching = true; // This case can occur when we see something like this: // @@ -240,7 +260,7 @@ bool PTHLexer::SkipBlock() { assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); // Did we reach a #endif? If so, go ahead and consume that token as well. if (isEndif) - CurPtr += DISK_TOKEN_SIZE; + CurPtr += DISK_TOKEN_SIZE*2; else LastHashTokPtr = HashEntryI; @@ -253,20 +273,13 @@ bool PTHLexer::SkipBlock() { // Update the location of the last observed '#'. This is useful if we // are skipping multiple blocks. LastHashTokPtr = CurPtr; - -#ifndef DEBUG - // In a debug build we should verify that the token is really a '#' that - // appears at the start of the line. - Token Tok; - ReadToken(Tok); - assert(Tok.isAtStartOfLine() && Tok.is(tok::hash)); -#else - // In a full release build we can just skip the token entirely. - CurPtr += DISK_TOKEN_SIZE; -#endif + // Skip the '#' token. + assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash); + CurPtr += DISK_TOKEN_SIZE; + // Did we reach a #endif? If so, go ahead and consume that token as well. - if (isEndif) { CurPtr += DISK_TOKEN_SIZE; } + if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } return isEndif; } @@ -287,38 +300,6 @@ SourceLocation PTHLexer::getSourceLocation() { } //===----------------------------------------------------------------------===// -// Token reconstruction from the PTH file. -//===----------------------------------------------------------------------===// - -void PTHLexer::ReadToken(Token& T) { - // Clear the token. - // FIXME: Setting the flags directly should obviate this step. - T.startToken(); - - // Shadow CurPtr into an automatic variable so that Read8 doesn't load and - // store back into the instance variable. - const char *CurPtrShadow = CurPtr; - - // Read the type of the token. - T.setKind((tok::TokenKind) Read8(CurPtrShadow)); - - // Set flags. This is gross, since we are really setting multiple flags. - T.setFlag((Token::TokenFlags) Read8(CurPtrShadow)); - - // Set the IdentifierInfo* (if any). - T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow)); - - // Set the SourceLocation. Since all tokens are constructed using a - // raw lexer, they will all be offseted from the same FileID. - T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow))); - - // Finally, read and set the length of the token. - T.setLength(Read32(CurPtrShadow)); - - CurPtr = CurPtrShadow; -} - -//===----------------------------------------------------------------------===// // Internal Data Structures for PTH file lookup and resolving identifiers. //===----------------------------------------------------------------------===// |