diff options
author | Douglas Gregor <dgregor@apple.com> | 2010-03-18 00:42:48 +0000 |
---|---|---|
committer | Douglas Gregor <dgregor@apple.com> | 2010-03-18 00:42:48 +0000 |
commit | 9f1e3ff3b3095967e2b92b57a53524e2d6bb141c (patch) | |
tree | 28b9b4d3d754c3c581a84bb25180ed8149271a45 | |
parent | 5e454aa4c9249693ec69be05e16081c9b7a48dc7 (diff) |
Experimental stab at using relexing to identify preprocessor
directives while annotating tokens in CIndex. This functionality
should probably be factored out of this routine, but we're not there
yet.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98786 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang-c/Index.h | 14 | ||||
-rw-r--r-- | test/Index/annotate-tokens-pp.c | 31 | ||||
-rw-r--r-- | tools/CIndex/CIndex.cpp | 124 | ||||
-rw-r--r-- | tools/CIndex/CIndex.exports | 1 | ||||
-rw-r--r-- | tools/CIndex/CXCursor.cpp | 18 | ||||
-rw-r--r-- | tools/CIndex/CXCursor.h | 6 |
6 files changed, 185 insertions, 9 deletions
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h index 15afe8f503..2042edcdb5 100644 --- a/include/clang-c/Index.h +++ b/include/clang-c/Index.h @@ -796,7 +796,13 @@ enum CXCursorKind { CXCursor_IBActionAttr = 401, CXCursor_IBOutletAttr = 402, - CXCursor_LastAttr = CXCursor_IBOutletAttr + CXCursor_LastAttr = CXCursor_IBOutletAttr, + + /* Preprocessing */ + CXCursor_PreprocessingDirective = 500, + + CXCursor_FirstPreprocessing = CXCursor_PreprocessingDirective, + CXCursor_LastPreprocessing = CXCursor_PreprocessingDirective }; /** @@ -889,6 +895,12 @@ CINDEX_LINKAGE unsigned clang_isInvalid(enum CXCursorKind); CINDEX_LINKAGE unsigned clang_isTranslationUnit(enum CXCursorKind); /*** + * \brief Determine whether the given cursor represents a preprocessing + * element, such as a preprocessor directive or macro instantiation. + */ +CINDEX_LINKAGE unsigned clang_isPreprocessing(enum CXCursorKind); + +/*** * \brief Determine whether the given cursor represents a currently * unexposed piece of the AST (e.g., CXCursor_UnexposedStmt). */ diff --git a/test/Index/annotate-tokens-pp.c b/test/Index/annotate-tokens-pp.c new file mode 100644 index 0000000000..32481fe0f0 --- /dev/null +++ b/test/Index/annotate-tokens-pp.c @@ -0,0 +1,31 @@ +#define BAR baz +#define WIBBLE(X, Y) +WIBBLE(int, float) +int BAR; +#include "foo.h" + +// RUN: c-index-test -test-annotate-tokens=%s:1:1:6:1 -I%S/Inputs %s | FileCheck %s +// CHECK: Punctuation: "#" [1:1 - 1:2] preprocessing directive= +// CHECK: Identifier: "define" [1:2 - 1:8] preprocessing directive= +// CHECK: Identifier: "BAR" [1:9 - 1:12] preprocessing directive= +// CHECK: Identifier: "baz" [1:13 - 1:16] preprocessing directive= +// CHECK: Punctuation: "#" [2:1 - 2:2] preprocessing directive= +// CHECK: Identifier: "define" [2:2 - 2:8] preprocessing directive= +// CHECK: Identifier: "WIBBLE" [2:9 - 2:15] preprocessing directive= +// CHECK: Punctuation: "(" [2:15 - 2:16] preprocessing directive= +// CHECK: Identifier: "X" [2:16 - 2:17] preprocessing directive= +// CHECK: Punctuation: "," [2:17 - 2:18] preprocessing directive= +// CHECK: Identifier: "Y" [2:19 - 2:20] preprocessing directive= +// CHECK: Punctuation: ")" [2:20 - 2:21] preprocessing directive= +// CHECK: Identifier: "WIBBLE" [3:1 - 3:7] +// CHECK: Punctuation: "(" [3:7 - 3:8] +// CHECK: Keyword: "int" [3:8 - 3:11] +// CHECK: Punctuation: "," [3:11 - 3:12] +// CHECK: Keyword: "float" [3:13 - 3:18] +// CHECK: Punctuation: ")" [3:18 - 3:19] +// CHECK: Keyword: "int" [4:1 - 4:4] +// CHECK: Identifier: "BAR" [4:5 - 4:8] +// CHECK: Punctuation: ";" [4:8 - 4:9] +// CHECK: Punctuation: "#" [5:1 - 5:2] preprocessing directive= +// CHECK: Identifier: "include" [5:2 - 5:9] preprocessing directive= +// CHECK: Literal: ""foo.h"" [5:10 - 5:17] preprocessing directive= diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp index db27ffe68a..61bfdba590 100644 --- a/tools/CIndex/CIndex.cpp +++ b/tools/CIndex/CIndex.cpp @@ -1518,8 +1518,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return createCXString("UnexposedAttr"); case CXCursor_IBActionAttr: return createCXString("attribute(ibaction)"); - case CXCursor_IBOutletAttr: - return createCXString("attribute(iboutlet)"); + case CXCursor_IBOutletAttr: + return createCXString("attribute(iboutlet)"); + case CXCursor_PreprocessingDirective: + return createCXString("preprocessing directive"); } llvm_unreachable("Unhandled CXCursorKind"); @@ -1590,6 +1592,10 @@ unsigned clang_isTranslationUnit(enum CXCursorKind K) { return K == CXCursor_TranslationUnit; } +unsigned clang_isPreprocessing(enum CXCursorKind K) { + return K >= CXCursor_FirstPreprocessing && K <= CXCursor_LastPreprocessing; +} + unsigned clang_isUnexposed(enum CXCursorKind K) { switch (K) { case CXCursor_UnexposedDecl: @@ -1642,6 +1648,11 @@ CXSourceLocation clang_getCursorLocation(CXCursor C) { return cxloc::translateSourceLocation(getCursorContext(C), getLocationFromExpr(getCursorExpr(C))); + if (C.kind == CXCursor_PreprocessingDirective) { + SourceLocation L = cxcursor::getCursorPreprocessingDirective(C).getBegin(); + return cxloc::translateSourceLocation(getCursorContext(C), L); + } + if (!getCursorDecl(C)) return clang_getNullLocation(); @@ -1693,6 +1704,11 @@ CXSourceRange clang_getCursorExtent(CXCursor C) { return cxloc::translateSourceRange(getCursorContext(C), getCursorStmt(C)->getSourceRange()); + if (C.kind == CXCursor_PreprocessingDirective) { + SourceRange R = cxcursor::getCursorPreprocessingDirective(C); + return cxloc::translateSourceRange(getCursorContext(C), R); + } + if (!getCursorDecl(C)) return clang_getNullRange(); @@ -2216,7 +2232,8 @@ void clang_annotateTokens(CXTranslationUnit TU, ASTUnit::ConcurrencyCheck Check(*CXXUnit); - // Annotate all of the source locations in the region of interest that map + // Annotate all of the source locations in the region of interest that map to + // a specific cursor. SourceRange RegionOfInterest; RegionOfInterest.setBegin( cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[0]))); @@ -2224,23 +2241,114 @@ void clang_annotateTokens(CXTranslationUnit TU, = cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[NumTokens - 1])); RegionOfInterest.setEnd(CXXUnit->getPreprocessor().getLocForEndOfToken(End)); - // FIXME: Would be great to have a "hint" cursor, then walk from that - // hint cursor upward until we find a cursor whose source range encloses - // the region of interest, rather than starting from the translation unit. + AnnotateTokensData Annotated; CXCursor Parent = clang_getTranslationUnitCursor(CXXUnit); CursorVisitor AnnotateVis(CXXUnit, AnnotateTokensVisitor, &Annotated, Decl::MaxPCHLevel, RegionOfInterest); AnnotateVis.VisitChildren(Parent); + // Look for macro instantiations and preprocessing directives in the + // source range containing the annotated tokens. We do this by re-lexing the + // tokens in the source range. + SourceManager &SourceMgr = CXXUnit->getSourceManager(); + std::pair<FileID, unsigned> BeginLocInfo + = SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin()); + std::pair<FileID, unsigned> EndLocInfo + = SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd()); + + bool RelexOkay = true; + + // Cannot re-tokenize across files. + if (BeginLocInfo.first != EndLocInfo.first) + RelexOkay = false; + + llvm::StringRef Buffer; + if (RelexOkay) { + // Create a lexer + bool Invalid = false; + Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid); + if (Invalid) + RelexOkay = false; + } + + if (RelexOkay) { + Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first), + CXXUnit->getASTContext().getLangOptions(), + Buffer.begin(), Buffer.data() + BeginLocInfo.second, Buffer.end()); + Lex.SetCommentRetentionState(true); + + // Lex tokens in raw mode until we hit the end of the range, to avoid + // entering #includes or expanding macros. + std::vector<Token> TokenStream; + const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second; + Preprocessor &PP = CXXUnit->getPreprocessor(); + while (Lex.getBufferLocation() <= EffectiveBufferEnd) { + Token Tok; + Lex.LexFromRawLexer(Tok); + + reprocess: + if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) { + // We have found a preprocessing directive. Gobble it up so that we + // don't see it while preprocessing these tokens later, but keep track of + // all of the token locations inside this preprocessing directive so that + // we can annotate them appropriately. + // + // FIXME: Some simple tests here could identify macro definitions and + // #undefs, to provide specific cursor kinds for those. + std::vector<SourceLocation> Locations; + do { + Locations.push_back(Tok.getLocation()); + Lex.LexFromRawLexer(Tok); + } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof)); + + using namespace cxcursor; + CXCursor Cursor + = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(), + Locations.back()), + CXXUnit); + for (unsigned I = 0, N = Locations.size(); I != N; ++I) { + Annotated[Locations[I].getRawEncoding()] = Cursor; + } + + if (Tok.is(tok::eof)) + break; + + if (Tok.isAtStartOfLine()) + goto reprocess; + + continue; + } + + // If this is a ## token, change its kind to unknown so that repreprocessing + // it will not produce an error. + if (Tok.is(tok::hashhash)) + Tok.setKind(tok::unknown); + + // If this raw token is an identifier, the raw lexer won't have looked up + // the corresponding identifier info for it. Do this now so that it will be + // macro expanded when we re-preprocess it. + if (Tok.is(tok::identifier)) { + // Change the kind of this identifier to the appropriate token kind, e.g. + // turning "for" into a keyword. + Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID()); + } + + TokenStream.push_back(Tok); + + if (Tok.is(tok::eof)) + break; + } + } + for (unsigned I = 0; I != NumTokens; ++I) { // Determine whether we saw a cursor at this token's location. AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]); if (Pos == Annotated.end()) continue; - + Cursors[I] = Pos->second; - } + } } void clang_disposeTokens(CXTranslationUnit TU, diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports index fe0396d2b1..d036e5cfd8 100644 --- a/tools/CIndex/CIndex.exports +++ b/tools/CIndex/CIndex.exports @@ -70,6 +70,7 @@ _clang_isCursorDefinition _clang_isDeclaration _clang_isExpression _clang_isInvalid +_clang_isPreprocessing _clang_isReference _clang_isStatement _clang_isTranslationUnit diff --git a/tools/CIndex/CXCursor.cpp b/tools/CIndex/CXCursor.cpp index 0fa73a513d..f2294b0996 100644 --- a/tools/CIndex/CXCursor.cpp +++ b/tools/CIndex/CXCursor.cpp @@ -296,6 +296,24 @@ cxcursor::getCursorTypeRef(CXCursor C) { reinterpret_cast<uintptr_t>(C.data[1]))); } +CXCursor cxcursor::MakePreprocessingDirectiveCursor(SourceRange Range, + ASTUnit *TU) { + CXCursor C = { CXCursor_PreprocessingDirective, + { reinterpret_cast<void *>(Range.getBegin().getRawEncoding()), + reinterpret_cast<void *>(Range.getEnd().getRawEncoding()), + TU } + }; + return C; +} + +SourceRange cxcursor::getCursorPreprocessingDirective(CXCursor C) { + assert(C.kind == CXCursor_PreprocessingDirective); + return SourceRange(SourceLocation::getFromRawEncoding( + reinterpret_cast<uintptr_t> (C.data[0])), + SourceLocation::getFromRawEncoding( + reinterpret_cast<uintptr_t> (C.data[1]))); +} + Decl *cxcursor::getCursorDecl(CXCursor Cursor) { return (Decl *)Cursor.data[0]; } diff --git a/tools/CIndex/CXCursor.h b/tools/CIndex/CXCursor.h index 934d5e2aeb..aa5d4f3a39 100644 --- a/tools/CIndex/CXCursor.h +++ b/tools/CIndex/CXCursor.h @@ -73,6 +73,12 @@ CXCursor MakeCursorTypeRef(TypeDecl *Type, SourceLocation Loc, ASTUnit *TU); /// and optionally the location where the reference occurred. std::pair<TypeDecl *, SourceLocation> getCursorTypeRef(CXCursor C); +/// \brief Create a preprocessing directive cursor. +CXCursor MakePreprocessingDirectiveCursor(SourceRange Range, ASTUnit *TU); + +/// \brief Unpack a given preprocessing directive to retrieve its source range. +SourceRange getCursorPreprocessingDirective(CXCursor C); + Decl *getCursorDecl(CXCursor Cursor); Expr *getCursorExpr(CXCursor Cursor); Stmt *getCursorStmt(CXCursor Cursor); |