aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouglas Gregor <dgregor@apple.com>2010-03-18 00:42:48 +0000
committerDouglas Gregor <dgregor@apple.com>2010-03-18 00:42:48 +0000
commit9f1e3ff3b3095967e2b92b57a53524e2d6bb141c (patch)
tree28b9b4d3d754c3c581a84bb25180ed8149271a45
parent5e454aa4c9249693ec69be05e16081c9b7a48dc7 (diff)
Experimental stab at using relexing to identify preprocessor
directives while annotating tokens in CIndex. This functionality should probably be factored out of this routine, but we're not there yet. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98786 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang-c/Index.h14
-rw-r--r--test/Index/annotate-tokens-pp.c31
-rw-r--r--tools/CIndex/CIndex.cpp124
-rw-r--r--tools/CIndex/CIndex.exports1
-rw-r--r--tools/CIndex/CXCursor.cpp18
-rw-r--r--tools/CIndex/CXCursor.h6
6 files changed, 185 insertions, 9 deletions
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index 15afe8f503..2042edcdb5 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -796,7 +796,13 @@ enum CXCursorKind {
CXCursor_IBActionAttr = 401,
CXCursor_IBOutletAttr = 402,
- CXCursor_LastAttr = CXCursor_IBOutletAttr
+ CXCursor_LastAttr = CXCursor_IBOutletAttr,
+
+ /* Preprocessing */
+ CXCursor_PreprocessingDirective = 500,
+
+ CXCursor_FirstPreprocessing = CXCursor_PreprocessingDirective,
+ CXCursor_LastPreprocessing = CXCursor_PreprocessingDirective
};
/**
@@ -889,6 +895,12 @@ CINDEX_LINKAGE unsigned clang_isInvalid(enum CXCursorKind);
CINDEX_LINKAGE unsigned clang_isTranslationUnit(enum CXCursorKind);
/***
+ * \brief Determine whether the given cursor represents a preprocessing
+ * element, such as a preprocessor directive or macro instantiation.
+ */
+CINDEX_LINKAGE unsigned clang_isPreprocessing(enum CXCursorKind);
+
+/***
* \brief Determine whether the given cursor represents a currently
* unexposed piece of the AST (e.g., CXCursor_UnexposedStmt).
*/
diff --git a/test/Index/annotate-tokens-pp.c b/test/Index/annotate-tokens-pp.c
new file mode 100644
index 0000000000..32481fe0f0
--- /dev/null
+++ b/test/Index/annotate-tokens-pp.c
@@ -0,0 +1,31 @@
+#define BAR baz
+#define WIBBLE(X, Y)
+WIBBLE(int, float)
+int BAR;
+#include "foo.h"
+
+// RUN: c-index-test -test-annotate-tokens=%s:1:1:6:1 -I%S/Inputs %s | FileCheck %s
+// CHECK: Punctuation: "#" [1:1 - 1:2] preprocessing directive=
+// CHECK: Identifier: "define" [1:2 - 1:8] preprocessing directive=
+// CHECK: Identifier: "BAR" [1:9 - 1:12] preprocessing directive=
+// CHECK: Identifier: "baz" [1:13 - 1:16] preprocessing directive=
+// CHECK: Punctuation: "#" [2:1 - 2:2] preprocessing directive=
+// CHECK: Identifier: "define" [2:2 - 2:8] preprocessing directive=
+// CHECK: Identifier: "WIBBLE" [2:9 - 2:15] preprocessing directive=
+// CHECK: Punctuation: "(" [2:15 - 2:16] preprocessing directive=
+// CHECK: Identifier: "X" [2:16 - 2:17] preprocessing directive=
+// CHECK: Punctuation: "," [2:17 - 2:18] preprocessing directive=
+// CHECK: Identifier: "Y" [2:19 - 2:20] preprocessing directive=
+// CHECK: Punctuation: ")" [2:20 - 2:21] preprocessing directive=
+// CHECK: Identifier: "WIBBLE" [3:1 - 3:7]
+// CHECK: Punctuation: "(" [3:7 - 3:8]
+// CHECK: Keyword: "int" [3:8 - 3:11]
+// CHECK: Punctuation: "," [3:11 - 3:12]
+// CHECK: Keyword: "float" [3:13 - 3:18]
+// CHECK: Punctuation: ")" [3:18 - 3:19]
+// CHECK: Keyword: "int" [4:1 - 4:4]
+// CHECK: Identifier: "BAR" [4:5 - 4:8]
+// CHECK: Punctuation: ";" [4:8 - 4:9]
+// CHECK: Punctuation: "#" [5:1 - 5:2] preprocessing directive=
+// CHECK: Identifier: "include" [5:2 - 5:9] preprocessing directive=
+// CHECK: Literal: ""foo.h"" [5:10 - 5:17] preprocessing directive=
diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp
index db27ffe68a..61bfdba590 100644
--- a/tools/CIndex/CIndex.cpp
+++ b/tools/CIndex/CIndex.cpp
@@ -1518,8 +1518,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return createCXString("UnexposedAttr");
case CXCursor_IBActionAttr:
return createCXString("attribute(ibaction)");
- case CXCursor_IBOutletAttr:
- return createCXString("attribute(iboutlet)");
+ case CXCursor_IBOutletAttr:
+ return createCXString("attribute(iboutlet)");
+ case CXCursor_PreprocessingDirective:
+ return createCXString("preprocessing directive");
}
llvm_unreachable("Unhandled CXCursorKind");
@@ -1590,6 +1592,10 @@ unsigned clang_isTranslationUnit(enum CXCursorKind K) {
return K == CXCursor_TranslationUnit;
}
+unsigned clang_isPreprocessing(enum CXCursorKind K) {
+ return K >= CXCursor_FirstPreprocessing && K <= CXCursor_LastPreprocessing;
+}
+
unsigned clang_isUnexposed(enum CXCursorKind K) {
switch (K) {
case CXCursor_UnexposedDecl:
@@ -1642,6 +1648,11 @@ CXSourceLocation clang_getCursorLocation(CXCursor C) {
return cxloc::translateSourceLocation(getCursorContext(C),
getLocationFromExpr(getCursorExpr(C)));
+ if (C.kind == CXCursor_PreprocessingDirective) {
+ SourceLocation L = cxcursor::getCursorPreprocessingDirective(C).getBegin();
+ return cxloc::translateSourceLocation(getCursorContext(C), L);
+ }
+
if (!getCursorDecl(C))
return clang_getNullLocation();
@@ -1693,6 +1704,11 @@ CXSourceRange clang_getCursorExtent(CXCursor C) {
return cxloc::translateSourceRange(getCursorContext(C),
getCursorStmt(C)->getSourceRange());
+ if (C.kind == CXCursor_PreprocessingDirective) {
+ SourceRange R = cxcursor::getCursorPreprocessingDirective(C);
+ return cxloc::translateSourceRange(getCursorContext(C), R);
+ }
+
if (!getCursorDecl(C))
return clang_getNullRange();
@@ -2216,7 +2232,8 @@ void clang_annotateTokens(CXTranslationUnit TU,
ASTUnit::ConcurrencyCheck Check(*CXXUnit);
- // Annotate all of the source locations in the region of interest that map
+ // Annotate all of the source locations in the region of interest that map to
+ // a specific cursor.
SourceRange RegionOfInterest;
RegionOfInterest.setBegin(
cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[0])));
@@ -2224,23 +2241,114 @@ void clang_annotateTokens(CXTranslationUnit TU,
= cxloc::translateSourceLocation(clang_getTokenLocation(TU,
Tokens[NumTokens - 1]));
RegionOfInterest.setEnd(CXXUnit->getPreprocessor().getLocForEndOfToken(End));
- // FIXME: Would be great to have a "hint" cursor, then walk from that
- // hint cursor upward until we find a cursor whose source range encloses
- // the region of interest, rather than starting from the translation unit.
+
AnnotateTokensData Annotated;
CXCursor Parent = clang_getTranslationUnitCursor(CXXUnit);
CursorVisitor AnnotateVis(CXXUnit, AnnotateTokensVisitor, &Annotated,
Decl::MaxPCHLevel, RegionOfInterest);
AnnotateVis.VisitChildren(Parent);
+ // Look for macro instantiations and preprocessing directives in the
+ // source range containing the annotated tokens. We do this by re-lexing the
+ // tokens in the source range.
+ SourceManager &SourceMgr = CXXUnit->getSourceManager();
+ std::pair<FileID, unsigned> BeginLocInfo
+ = SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin());
+ std::pair<FileID, unsigned> EndLocInfo
+ = SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd());
+
+ bool RelexOkay = true;
+
+ // Cannot re-tokenize across files.
+ if (BeginLocInfo.first != EndLocInfo.first)
+ RelexOkay = false;
+
+ llvm::StringRef Buffer;
+ if (RelexOkay) {
+ // Create a lexer
+ bool Invalid = false;
+ Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid);
+ if (Invalid)
+ RelexOkay = false;
+ }
+
+ if (RelexOkay) {
+ Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
+ CXXUnit->getASTContext().getLangOptions(),
+ Buffer.begin(), Buffer.data() + BeginLocInfo.second, Buffer.end());
+ Lex.SetCommentRetentionState(true);
+
+ // Lex tokens in raw mode until we hit the end of the range, to avoid
+ // entering #includes or expanding macros.
+ std::vector<Token> TokenStream;
+ const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second;
+ Preprocessor &PP = CXXUnit->getPreprocessor();
+ while (Lex.getBufferLocation() <= EffectiveBufferEnd) {
+ Token Tok;
+ Lex.LexFromRawLexer(Tok);
+
+ reprocess:
+ if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+ // We have found a preprocessing directive. Gobble it up so that we
+ // don't see it while preprocessing these tokens later, but keep track of
+ // all of the token locations inside this preprocessing directive so that
+ // we can annotate them appropriately.
+ //
+ // FIXME: Some simple tests here could identify macro definitions and
+ // #undefs, to provide specific cursor kinds for those.
+ std::vector<SourceLocation> Locations;
+ do {
+ Locations.push_back(Tok.getLocation());
+ Lex.LexFromRawLexer(Tok);
+ } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof));
+
+ using namespace cxcursor;
+ CXCursor Cursor
+ = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(),
+ Locations.back()),
+ CXXUnit);
+ for (unsigned I = 0, N = Locations.size(); I != N; ++I) {
+ Annotated[Locations[I].getRawEncoding()] = Cursor;
+ }
+
+ if (Tok.is(tok::eof))
+ break;
+
+ if (Tok.isAtStartOfLine())
+ goto reprocess;
+
+ continue;
+ }
+
+ // If this is a ## token, change its kind to unknown so that repreprocessing
+ // it will not produce an error.
+ if (Tok.is(tok::hashhash))
+ Tok.setKind(tok::unknown);
+
+ // If this raw token is an identifier, the raw lexer won't have looked up
+ // the corresponding identifier info for it. Do this now so that it will be
+ // macro expanded when we re-preprocess it.
+ if (Tok.is(tok::identifier)) {
+ // Change the kind of this identifier to the appropriate token kind, e.g.
+ // turning "for" into a keyword.
+ Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
+ }
+
+ TokenStream.push_back(Tok);
+
+ if (Tok.is(tok::eof))
+ break;
+ }
+ }
+
for (unsigned I = 0; I != NumTokens; ++I) {
// Determine whether we saw a cursor at this token's location.
AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]);
if (Pos == Annotated.end())
continue;
-
+
Cursors[I] = Pos->second;
- }
+ }
}
void clang_disposeTokens(CXTranslationUnit TU,
diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports
index fe0396d2b1..d036e5cfd8 100644
--- a/tools/CIndex/CIndex.exports
+++ b/tools/CIndex/CIndex.exports
@@ -70,6 +70,7 @@ _clang_isCursorDefinition
_clang_isDeclaration
_clang_isExpression
_clang_isInvalid
+_clang_isPreprocessing
_clang_isReference
_clang_isStatement
_clang_isTranslationUnit
diff --git a/tools/CIndex/CXCursor.cpp b/tools/CIndex/CXCursor.cpp
index 0fa73a513d..f2294b0996 100644
--- a/tools/CIndex/CXCursor.cpp
+++ b/tools/CIndex/CXCursor.cpp
@@ -296,6 +296,24 @@ cxcursor::getCursorTypeRef(CXCursor C) {
reinterpret_cast<uintptr_t>(C.data[1])));
}
+CXCursor cxcursor::MakePreprocessingDirectiveCursor(SourceRange Range,
+ ASTUnit *TU) {
+ CXCursor C = { CXCursor_PreprocessingDirective,
+ { reinterpret_cast<void *>(Range.getBegin().getRawEncoding()),
+ reinterpret_cast<void *>(Range.getEnd().getRawEncoding()),
+ TU }
+ };
+ return C;
+}
+
+SourceRange cxcursor::getCursorPreprocessingDirective(CXCursor C) {
+ assert(C.kind == CXCursor_PreprocessingDirective);
+ return SourceRange(SourceLocation::getFromRawEncoding(
+ reinterpret_cast<uintptr_t> (C.data[0])),
+ SourceLocation::getFromRawEncoding(
+ reinterpret_cast<uintptr_t> (C.data[1])));
+}
+
Decl *cxcursor::getCursorDecl(CXCursor Cursor) {
return (Decl *)Cursor.data[0];
}
diff --git a/tools/CIndex/CXCursor.h b/tools/CIndex/CXCursor.h
index 934d5e2aeb..aa5d4f3a39 100644
--- a/tools/CIndex/CXCursor.h
+++ b/tools/CIndex/CXCursor.h
@@ -73,6 +73,12 @@ CXCursor MakeCursorTypeRef(TypeDecl *Type, SourceLocation Loc, ASTUnit *TU);
/// and optionally the location where the reference occurred.
std::pair<TypeDecl *, SourceLocation> getCursorTypeRef(CXCursor C);
+/// \brief Create a preprocessing directive cursor.
+CXCursor MakePreprocessingDirectiveCursor(SourceRange Range, ASTUnit *TU);
+
+/// \brief Unpack a given preprocessing directive to retrieve its source range.
+SourceRange getCursorPreprocessingDirective(CXCursor C);
+
Decl *getCursorDecl(CXCursor Cursor);
Expr *getCursorExpr(CXCursor Cursor);
Stmt *getCursorStmt(CXCursor Cursor);