diff options
author | Douglas Gregor <dgregor@apple.com> | 2010-07-20 20:18:03 +0000 |
---|---|---|
committer | Douglas Gregor <dgregor@apple.com> | 2010-07-20 20:18:03 +0000 |
commit | f033f1da4a34f8df6e95e9929dc04ff54bb8fb01 (patch) | |
tree | 6085cfe3a898ad4e4ae54d33b2ea9ab0cae8beb3 /lib/Lex/Lexer.cpp | |
parent | 4751a53c5e5fed4bf2271e29cae7411c93a77df7 (diff) |
Introduce a new lexer function to compute the "preamble" of a file,
which is the part of the file that contains all of the initial
comments, includes, and preprocessor directives that occur before any
of the actual code. Added a new -print-preamble cc1 action that is
only used for testing.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108913 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 91b14f638d..2f11c37e65 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -28,6 +28,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include <cctype> @@ -247,6 +248,130 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, return TheTok.getLength(); } +namespace { + enum PreambleDirectiveKind { + PDK_Skipped, + PDK_StartIf, + PDK_EndIf, + PDK_Unknown + }; +} + +unsigned Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer) { + // Create a lexer starting at the beginning of the file. Note that we use a + // "fake" file source location at offset 1 so that the lexer will track our + // position within the file. + const unsigned StartOffset = 1; + SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset); + LangOptions LangOpts; + Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(), + Buffer->getBufferStart(), Buffer->getBufferEnd()); + + bool InPreprocessorDirective = false; + Token TheTok; + Token IfStartTok; + unsigned IfCount = 0; + do { + TheLexer.LexFromRawLexer(TheTok); + + if (InPreprocessorDirective) { + // If we've hit the end of the file, we're done. + if (TheTok.getKind() == tok::eof) { + InPreprocessorDirective = false; + break; + } + + // If we haven't hit the end of the preprocessor directive, skip this + // token. + if (!TheTok.isAtStartOfLine()) + continue; + + // We've passed the end of the preprocessor directive, and will look + // at this token again below. + InPreprocessorDirective = false; + } + + // Comments are okay; skip over them. + if (TheTok.getKind() == tok::comment) + continue; + + if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { + // This is the start of a preprocessor directive. + Token HashTok = TheTok; + InPreprocessorDirective = true; + + // Figure out which direective this is. Since we're lexing raw tokens, + // we don't have an identifier table available. Instead, just look at + // the raw identifier to recognize and categorize preprocessor directives. + TheLexer.LexFromRawLexer(TheTok); + if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) { + const char *IdStart = Buffer->getBufferStart() + + TheTok.getLocation().getRawEncoding() - 1; + llvm::StringRef Keyword(IdStart, TheTok.getLength()); + PreambleDirectiveKind PDK + = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) + .Case("include", PDK_Skipped) + .Case("__include_macros", PDK_Skipped) + .Case("define", PDK_Skipped) + .Case("undef", PDK_Skipped) + .Case("line", PDK_Skipped) + .Case("error", PDK_Skipped) + .Case("pragma", PDK_Skipped) + .Case("import", PDK_Skipped) + .Case("include_next", PDK_Skipped) + .Case("warning", PDK_Skipped) + .Case("ident", PDK_Skipped) + .Case("sccs", PDK_Skipped) + .Case("assert", PDK_Skipped) + .Case("unassert", PDK_Skipped) + .Case("if", PDK_StartIf) + .Case("ifdef", PDK_StartIf) + .Case("ifndef", PDK_StartIf) + .Case("elif", PDK_Skipped) + .Case("else", PDK_Skipped) + .Case("endif", PDK_EndIf) + .Default(PDK_Unknown); + + switch (PDK) { + case PDK_Skipped: + continue; + + case PDK_StartIf: + if (IfCount == 0) + IfStartTok = HashTok; + + ++IfCount; + continue; + + case PDK_EndIf: + // Mismatched #endif. The preamble ends here. + if (IfCount == 0) + break; + + --IfCount; + continue; + + case PDK_Unknown: + // We don't know what this directive is; stop at the '#'. + break; + } + } + + // We only end up here if we didn't recognize the preprocessor + // directive or it was one that can't occur in the preamble at this + // point. Roll back the current token to the location of the '#'. + InPreprocessorDirective = false; + TheTok = HashTok; + } + + // We hit a token + break; + } while (true); + + SourceLocation End = IfCount? IfStartTok.getLocation() : TheTok.getLocation(); + return End.getRawEncoding() - StartLoc.getRawEncoding(); +} + //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// |