diff options
author | Douglas Gregor <dgregor@apple.com> | 2010-01-26 17:06:03 +0000 |
---|---|---|
committer | Douglas Gregor <dgregor@apple.com> | 2010-01-26 17:06:03 +0000 |
commit | fc8ea23eb6cbaaa5046f2abb4c033e24c8659efd (patch) | |
tree | 33d33eb29395938f4ecc90667086bf6766db122e | |
parent | b896f625d1225450c0b30c4b82cb4d9af5642b9f (diff) |
Introduce a CIndex API for lexing the raw tokens within a given source
range. The token-annotation function does nothing, yet.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@94551 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang-c/Index.h | 119 | ||||
-rw-r--r-- | include/clang/Lex/Lexer.h | 3 | ||||
-rw-r--r-- | test/Index/annotate-tokens.c | 63 | ||||
-rw-r--r-- | tools/CIndex/CIndex.cpp | 192 | ||||
-rw-r--r-- | tools/CIndex/CIndex.exports | 7 | ||||
-rw-r--r-- | tools/CIndex/CIndexer.h | 3 | ||||
-rw-r--r-- | tools/CIndex/CXSourceLocation.h | 4 | ||||
-rw-r--r-- | tools/c-index-test/c-index-test.c | 181 |
8 files changed, 540 insertions, 32 deletions
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h index ab7e55bcb6..ff0a0e1f09 100644 --- a/include/clang-c/Index.h +++ b/include/clang-c/Index.h @@ -861,6 +861,125 @@ CINDEX_LINKAGE unsigned clang_isCursorDefinition(CXCursor); */ /** + * \defgroup CINDEX_LEX Lexing and syntactic analysis + * + * @{ + */ + +/** + * \brief Describes a kind of token. + */ +typedef enum CXTokenKind { + /** + * \brief A token that contains some kind of punctuation. + */ + CXToken_Punctuation, + + /** + * \brief A a language keyword. + */ + CXToken_Keyword, + + /** + * \brief An identifier (that is not a keyword). + */ + CXToken_Identifier, + + /** + * \brief A numeric, string, or character literal. + */ + CXToken_Literal, + + /** + * \brief A comment. + */ + CXToken_Comment +} CXTokenKind; + +/** + * \brief Describes a single preprocessing token. + */ +typedef struct { + unsigned int_data[4]; + void *ptr_data; +} CXToken; + +/** + * \brief Determine the kind of the given token. + */ +CINDEX_LINKAGE CXTokenKind clang_getTokenKind(CXToken); + +/** + * \brief Determine the spelling of the given token. + * + * The spelling of a token is the textual representation of that token, e.g., + * the text of an identifier or keyword. + */ +CINDEX_LINKAGE CXString clang_getTokenSpelling(CXTranslationUnit, CXToken); + +/** + * \brief Retrieve the source location of the given token. + */ +CINDEX_LINKAGE CXSourceLocation clang_getTokenLocation(CXTranslationUnit, + CXToken); + +/** + * \brief Retrieve a source range that covers the given token. + */ +CINDEX_LINKAGE CXSourceRange clang_getTokenExtent(CXTranslationUnit, CXToken); + +/** + * \brief Tokenize the source code described by the given range into raw + * lexical tokens. + * + * \param TU the translation unit whose text is being tokenized. + * + * \param Range the source range in which text should be tokenized. All of the + * tokens produced by tokenization will fall within this source range, + * + * \param Tokens this pointer will be set to point to the array of tokens + * that occur within the given source range. The returned pointer must be + * freed with clang_disposeTokens() before the translation unit is destroyed. + * + * \param NumTokens will be set to the number of tokens in the \c *Tokens + * array. + * + */ +CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, + CXToken **Tokens, unsigned *NumTokens); + +/** + * \brief Annotate the given set of tokens by providing cursors for each token + * that can be mapped to a specific entity within the abstract syntax tree. + * + * This token-annotation routine is equivalent to invoking clang_getCursor() + * for the source locations of each of the tokens, then accepting only those + * cursors that refer to a specific token. + * + * \param TU the translation unit that owns the given tokens. + * + * \param Tokens the set of tokens to annotate. + * + * \param NumTokens the number of tokens in \p Tokens. + * + * \param Cursors an array of \p NumTokens cursors, whose contents will be + * replaced with the cursors corresponding to each token. + */ +CINDEX_LINKAGE void clang_annotateTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens, + CXCursor *Cursors); + +/** + * \brief Free the given set of tokens. + */ +CINDEX_LINKAGE void clang_disposeTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens); + +/** + * @} + */ + +/** * \defgroup CINDEX_DEBUG Debugging facilities * * These routines are used for testing and debugging, only, and should not diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 0f36df43e2..6a6e319463 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -199,6 +199,9 @@ public: /// the current file. SourceLocation getSourceLocation() { return getSourceLocation(BufferPtr); } + /// \brief Return the current location in the buffer. + const char *getBufferLocation() const { return BufferPtr; } + /// Stringify - Convert the specified string into a C string by escaping '\' /// and " characters. This does not add surrounding ""'s to the string. /// If Charify is true, this escapes the ' character instead of ". diff --git a/test/Index/annotate-tokens.c b/test/Index/annotate-tokens.c new file mode 100644 index 0000000000..6d2b4d24f0 --- /dev/null +++ b/test/Index/annotate-tokens.c @@ -0,0 +1,63 @@ +typedef int T; +struct X { int a, b; }; +void f(void *ptr) { + T* t_ptr = (T *)ptr; + (void)sizeof(T); + /* A comment */ + struct X x = (struct X){1, 2}; + void *xx = ptr ? : &x; + const char * hello = "Hello"; +} + +// RUN: c-index-test -test-annotate-tokens=%s:4:1:9:32 %s | FileCheck %s +// CHECK: Identifier: "T" [4:3 - 4:3] +// CHECK: Punctuation: "*" [4:4 - 4:4] +// CHECK: Identifier: "t_ptr" [4:6 - 4:10] +// CHECK: Punctuation: "=" [4:12 - 4:12] +// CHECK: Punctuation: "(" [4:14 - 4:14] +// CHECK: Identifier: "T" [4:15 - 4:15] +// CHECK: Punctuation: "*" [4:17 - 4:17] +// CHECK: Punctuation: ")" [4:18 - 4:18] +// CHECK: Identifier: "ptr" [4:19 - 4:21] +// CHECK: Punctuation: ";" [4:22 - 4:22] +// CHECK: Punctuation: "(" [5:3 - 5:3] +// CHECK: Keyword: "void" [5:4 - 5:7] +// CHECK: Punctuation: ")" [5:8 - 5:8] +// CHECK: Keyword: "sizeof" [5:9 - 5:14] +// CHECK: Punctuation: "(" [5:15 - 5:15] +// CHECK: Identifier: "T" [5:16 - 5:16] +// CHECK: Punctuation: ")" [5:17 - 5:17] +// CHECK: Punctuation: ";" [5:18 - 5:18] +// CHECK: Comment: "/* A comment */" [6:3 - 6:17] +// CHECK: Keyword: "struct" [7:3 - 7:8] +// CHECK: Identifier: "X" [7:10 - 7:10] +// CHECK: Identifier: "x" [7:12 - 7:12] +// CHECK: Punctuation: "=" [7:14 - 7:14] +// CHECK: Punctuation: "(" [7:16 - 7:16] +// CHECK: Keyword: "struct" [7:17 - 7:22] +// CHECK: Identifier: "X" [7:24 - 7:24] +// CHECK: Punctuation: ")" [7:25 - 7:25] +// CHECK: Punctuation: "{" [7:26 - 7:26] +// CHECK: Literal: "1" [7:27 - 7:27] +// CHECK: Punctuation: "," [7:28 - 7:28] +// CHECK: Literal: "2" [7:30 - 7:30] +// CHECK: Punctuation: "}" [7:31 - 7:31] +// CHECK: Punctuation: ";" [7:32 - 7:32] +// CHECK: Keyword: "void" [8:3 - 8:6] +// CHECK: Punctuation: "*" [8:8 - 8:8] +// CHECK: Identifier: "xx" [8:9 - 8:10] +// CHECK: Punctuation: "=" [8:12 - 8:12] +// CHECK: Identifier: "ptr" [8:14 - 8:16] +// CHECK: Punctuation: "?" [8:18 - 8:18] +// CHECK: Punctuation: ":" [8:20 - 8:20] +// CHECK: Punctuation: "&" [8:22 - 8:22] +// CHECK: Identifier: "x" [8:23 - 8:23] +// CHECK: Punctuation: ";" [8:24 - 8:24] +// CHECK: Keyword: "const" [9:3 - 9:7] +// CHECK: Keyword: "char" [9:9 - 9:12] +// CHECK: Punctuation: "*" [9:14 - 9:14] +// CHECK: Identifier: "hello" [9:16 - 9:20] +// CHECK: Punctuation: "=" [9:22 - 9:22] +// CHECK: Literal: ""Hello"" [9:24 - 9:30] +// CHECK: Punctuation: ";" [9:31 - 9:31] +// CHECK: Punctuation: "}" [10:1 - 10:1] diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp index 03519adc0a..55061cba71 100644 --- a/tools/CIndex/CIndex.cpp +++ b/tools/CIndex/CIndex.cpp @@ -876,6 +876,21 @@ CXString CIndexer::createCXString(const char *String, bool DupString){ return Str; } +CXString CIndexer::createCXString(llvm::StringRef String, bool DupString) { + CXString Result; + if (DupString || (!String.empty() && String.data()[String.size()] != 0)) { + char *Spelling = (char *)malloc(String.size() + 1); + memmove(Spelling, String.data(), String.size()); + Spelling[String.size()] = 0; + Result.Spelling = Spelling; + Result.MustFreeString = 1; + } else { + Result.Spelling = String.data(); + Result.MustFreeString = 0; + } + return Result; +} + extern "C" { CXIndex clang_createIndex(int excludeDeclarationsFromPCH, int displayDiagnostics) { @@ -1882,6 +1897,183 @@ void clang_getDefinitionSpellingAndExtent(CXCursor C, } // end: extern "C" //===----------------------------------------------------------------------===// +// Token-based Operations. +//===----------------------------------------------------------------------===// + +/* CXToken layout: + * int_data[0]: a CXTokenKind + * int_data[1]: starting token location + * int_data[2]: token length + * int_data[3]: reserved + * ptr_data: for identifiers and keywords, an IdentifierInfo*. + * otherwise unused. + */ +extern "C" { + +CXTokenKind clang_getTokenKind(CXToken CXTok) { + return static_cast<CXTokenKind>(CXTok.int_data[0]); +} + +CXString clang_getTokenSpelling(CXTranslationUnit TU, CXToken CXTok) { + switch (clang_getTokenKind(CXTok)) { + case CXToken_Identifier: + case CXToken_Keyword: + // We know we have an IdentifierInfo*, so use that. + return CIndexer::createCXString( + static_cast<IdentifierInfo *>(CXTok.ptr_data)->getNameStart()); + + case CXToken_Literal: { + // We have stashed the starting pointer in the ptr_data field. Use it. + const char *Text = static_cast<const char *>(CXTok.ptr_data); + return CIndexer::createCXString(llvm::StringRef(Text, CXTok.int_data[2]), + true); + } + + case CXToken_Punctuation: + case CXToken_Comment: + break; + } + + // We have to find the starting buffer pointer the hard way, by + // deconstructing the source location. + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) + return CIndexer::createCXString(""); + + SourceLocation Loc = SourceLocation::getFromRawEncoding(CXTok.int_data[1]); + std::pair<FileID, unsigned> LocInfo + = CXXUnit->getSourceManager().getDecomposedLoc(Loc); + std::pair<const char *,const char *> Buffer + = CXXUnit->getSourceManager().getBufferData(LocInfo.first); + + return CIndexer::createCXString(llvm::StringRef(Buffer.first+LocInfo.second, + CXTok.int_data[2]), + true); +} + +CXSourceLocation clang_getTokenLocation(CXTranslationUnit TU, CXToken CXTok) { + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) + return clang_getNullLocation(); + + return cxloc::translateSourceLocation(CXXUnit->getASTContext(), + SourceLocation::getFromRawEncoding(CXTok.int_data[1])); +} + +CXSourceRange clang_getTokenExtent(CXTranslationUnit TU, CXToken CXTok) { + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) { + CXSourceRange Result = { 0, 0, 0 }; + return Result; + } + + return cxloc::translateSourceRange(CXXUnit->getASTContext(), + SourceLocation::getFromRawEncoding(CXTok.int_data[1])); +} + +void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, + CXToken **Tokens, unsigned *NumTokens) { + if (Tokens) + *Tokens = 0; + if (NumTokens) + *NumTokens = 0; + + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit || !Tokens || !NumTokens) + return; + + SourceRange R = cxloc::translateSourceRange(Range); + if (R.isInvalid()) + return; + + SourceManager &SourceMgr = CXXUnit->getSourceManager(); + std::pair<FileID, unsigned> BeginLocInfo + = SourceMgr.getDecomposedLoc(R.getBegin()); + std::pair<FileID, unsigned> EndLocInfo + = SourceMgr.getDecomposedLoc(R.getEnd()); + + // Cannot tokenize across files. + if (BeginLocInfo.first != EndLocInfo.first) + return; + + // Create a lexer + std::pair<const char *,const char *> Buffer + = SourceMgr.getBufferData(BeginLocInfo.first); + Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first), + CXXUnit->getASTContext().getLangOptions(), + Buffer.first, Buffer.first + BeginLocInfo.second, Buffer.second); + Lex.SetCommentRetentionState(true); + + // Lex tokens until we hit the end of the range. + const char *EffectiveBufferEnd = Buffer.first + EndLocInfo.second; + llvm::SmallVector<CXToken, 32> CXTokens; + Token Tok; + do { + // Lex the next token + Lex.LexFromRawLexer(Tok); + if (Tok.is(tok::eof)) + break; + + // Initialize the CXToken. + CXToken CXTok; + + // - Common fields + CXTok.int_data[1] = Tok.getLocation().getRawEncoding(); + CXTok.int_data[2] = Tok.getLength(); + CXTok.int_data[3] = 0; + + // - Kind-specific fields + if (Tok.isLiteral()) { + CXTok.int_data[0] = CXToken_Literal; + CXTok.ptr_data = (void *)Tok.getLiteralData(); + } else if (Tok.is(tok::identifier)) { + // Lookup the identifier to determine whether we have a + std::pair<FileID, unsigned> LocInfo + = SourceMgr.getDecomposedLoc(Tok.getLocation()); + const char *StartPos + = CXXUnit->getSourceManager().getBufferData(LocInfo.first).first + + LocInfo.second; + IdentifierInfo *II + = CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok, StartPos); + CXTok.int_data[0] = II->getTokenID() == tok::identifier? + CXToken_Identifier + : CXToken_Keyword; + CXTok.ptr_data = II; + } else if (Tok.is(tok::comment)) { + CXTok.int_data[0] = CXToken_Comment; + CXTok.ptr_data = 0; + } else { + CXTok.int_data[0] = CXToken_Punctuation; + CXTok.ptr_data = 0; + } + CXTokens.push_back(CXTok); + } while (Lex.getBufferLocation() <= EffectiveBufferEnd); + + if (CXTokens.empty()) + return; + + *Tokens = (CXToken *)malloc(sizeof(CXToken) * CXTokens.size()); + memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size()); + *NumTokens = CXTokens.size(); +} + +void clang_annotateTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens, + CXCursor *Cursors) { + // FIXME: Actually perform some meaningful lookup here. + for (unsigned I = 0; I != NumTokens; ++I) + Cursors[I] = clang_getNullCursor(); +} + +void clang_disposeTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens) { + if (Tokens) + free(Tokens); +} + +} // end: extern "C" + +//===----------------------------------------------------------------------===// // CXString Operations. //===----------------------------------------------------------------------===// diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports index b2ec58e5b9..fa141fc41c 100644 --- a/tools/CIndex/CIndex.exports +++ b/tools/CIndex/CIndex.exports @@ -1,3 +1,4 @@ +_clang_annotateTokens _clang_codeComplete _clang_createIndex _clang_createTranslationUnit @@ -5,6 +6,7 @@ _clang_createTranslationUnitFromSourceFile _clang_disposeCodeCompleteResults _clang_disposeIndex _clang_disposeString +_clang_disposeTokens _clang_disposeTranslationUnit _clang_equalCursors _clang_equalLocations @@ -35,6 +37,10 @@ _clang_getNumCompletionChunks _clang_getRange _clang_getRangeEnd _clang_getRangeStart +_clang_getTokenExtent +_clang_getTokenKind +_clang_getTokenLocation +_clang_getTokenSpelling _clang_getTranslationUnitCursor _clang_getTranslationUnitSpelling _clang_isCursorDefinition @@ -45,4 +51,5 @@ _clang_isReference _clang_isStatement _clang_isTranslationUnit _clang_setUseExternalASTGeneration +_clang_tokenize _clang_visitChildren diff --git a/tools/CIndex/CIndexer.h b/tools/CIndex/CIndexer.h index d01454f9dc..aa63ec0238 100644 --- a/tools/CIndex/CIndexer.h +++ b/tools/CIndex/CIndexer.h @@ -18,6 +18,7 @@ #include "clang-c/Index.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/ASTUnit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/System/Path.h" #include <vector> @@ -76,6 +77,8 @@ public: std::string getClangResourcesPath(); static CXString createCXString(const char *String, bool DupString = false); + static CXString createCXString(llvm::StringRef String, + bool DupString = false); }; namespace clang { diff --git a/tools/CIndex/CXSourceLocation.h b/tools/CIndex/CXSourceLocation.h index 0eab273c35..1f15f0832c 100644 --- a/tools/CIndex/CXSourceLocation.h +++ b/tools/CIndex/CXSourceLocation.h @@ -38,8 +38,8 @@ static inline CXSourceLocation translateSourceLocation(ASTContext &Context, static inline CXSourceRange translateSourceRange(ASTContext &Context, SourceRange R) { CXSourceRange Result = { &Context, - R.getBegin().getRawEncoding(), - R.getEnd().getRawEncoding() }; + R.getBegin().getRawEncoding(), + R.getEnd().getRawEncoding() }; return Result; } diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c index 4ef3904139..222ffbaa63 100644 --- a/tools/c-index-test/c-index-test.c +++ b/tools/c-index-test/c-index-test.c @@ -481,42 +481,62 @@ static int perform_file_scan(const char *ast_file, const char *source_file, on failure. If successful, the pointer *filename will contain newly-allocated memory (that will be owned by the caller) to store the file name. */ int parse_file_line_column(const char *input, char **filename, unsigned *line, - unsigned *column) { + unsigned *column, unsigned *second_line, + unsigned *second_column) { /* Find the second colon. */ - const char *second_colon = strrchr(input, ':'), *first_colon; + const char *last_colon = strrchr(input, ':'); + unsigned values[4], i; + unsigned num_values = (second_line && second_column)? 4 : 2; + char *endptr = 0; - if (!second_colon || second_colon == input) { - fprintf(stderr, "could not parse filename:line:column in '%s'\n", input); + if (!last_colon || last_colon == input) { + if (num_values == 4) + fprintf(stderr, "could not parse filename:line:column:line:column in " + "'%s'\n", input); + else + fprintf(stderr, "could not parse filename:line:column in '%s'\n", input); return 1; } - /* Parse the column number. */ - *column = strtol(second_colon + 1, &endptr, 10); - if (*endptr != 0) { - fprintf(stderr, "could not parse column in '%s'\n", input); - return 1; - } + for (i = 0; i != num_values; ++i) { + const char *prev_colon; - /* Find the first colon. */ - first_colon = second_colon - 1; - while (first_colon != input && *first_colon != ':') - --first_colon; - if (first_colon == input) { - fprintf(stderr, "could not parse line in '%s'\n", input); - return 1; - } + /* Parse the next line or column. */ + values[num_values - i - 1] = strtol(last_colon + 1, &endptr, 10); + if (*endptr != 0 && *endptr != ':') { + fprintf(stderr, "could not parse %s in '%s'\n", + (i % 2 ? "column" : "line"), input); + return 1; + } + + if (i + 1 == num_values) + break; - /* Parse the line number. */ - *line = strtol(first_colon + 1, &endptr, 10); - if (*endptr != ':') { - fprintf(stderr, "could not parse line in '%s'\n", input); - return 1; + /* Find the previous colon. */ + prev_colon = last_colon - 1; + while (prev_colon != input && *prev_colon != ':') + --prev_colon; + if (prev_colon == input) { + fprintf(stderr, "could not parse %s in '%s'\n", + (i % 2 == 0? "column" : "line"), input); + return 1; + } + + last_colon = prev_colon; } + + *line = values[0]; + *column = values[1]; + if (second_line && second_column) { + *second_line = values[2]; + *second_column = values[3]; + } + /* Copy the file name. */ - *filename = (char*)malloc(first_colon - input + 1); - memcpy(*filename, input, first_colon - input); - (*filename)[first_colon - input] = 0; + *filename = (char*)malloc(last_colon - input + 1); + memcpy(*filename, input, last_colon - input); + (*filename)[last_colon - input] = 0; return 0; } @@ -595,7 +615,8 @@ int perform_code_completion(int argc, const char **argv) { CXCodeCompleteResults *results = 0; input += strlen("-code-completion-at="); - if ((errorCode = parse_file_line_column(input, &filename, &line, &column))) + if ((errorCode = parse_file_line_column(input, &filename, &line, &column, + 0, 0))) return errorCode; if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files)) @@ -650,7 +671,7 @@ int inspect_cursor_at(int argc, const char **argv) { const char *input = argv[Loc + 1] + strlen("-cursor-at="); if ((errorCode = parse_file_line_column(input, &Locations[Loc].filename, &Locations[Loc].line, - &Locations[Loc].column))) + &Locations[Loc].column, 0, 0))) return errorCode; } @@ -689,6 +710,104 @@ int inspect_cursor_at(int argc, const char **argv) { return 0; } +int perform_token_annotation(int argc, const char **argv) { + const char *input = argv[1]; + char *filename = 0; + unsigned line, second_line; + unsigned column, second_column; + CXIndex CIdx; + CXTranslationUnit TU = 0; + int errorCode; + struct CXUnsavedFile *unsaved_files = 0; + int num_unsaved_files = 0; + CXToken *tokens; + unsigned num_tokens; + CXSourceRange range; + CXSourceLocation startLoc, endLoc; + CXFile file = 0; + CXCursor *cursors = 0; + unsigned i; + + input += strlen("-test-annotate-tokens="); + if ((errorCode = parse_file_line_column(input, &filename, &line, &column, + &second_line, &second_column))) + return errorCode; + + if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files)) + return -1; + + CIdx = clang_createIndex(0, 0); + TU = clang_createTranslationUnitFromSourceFile(CIdx, argv[argc - 1], + argc - num_unsaved_files - 3, + argv + num_unsaved_files + 2, + num_unsaved_files, + unsaved_files); + if (!TU) { + fprintf(stderr, "unable to parse input\n"); + clang_disposeIndex(CIdx); + free(filename); + free_remapped_files(unsaved_files, num_unsaved_files); + return -1; + } + errorCode = 0; + + file = clang_getFile(TU, filename); + if (!file) { + fprintf(stderr, "file %s is not in this translation unit\n", filename); + errorCode = -1; + goto teardown; + } + + startLoc = clang_getLocation(TU, file, line, column); + if (clang_equalLocations(clang_getNullLocation(), startLoc)) { + fprintf(stderr, "invalid source location %s:%d:%d\n", filename, line, + column); + errorCode = -1; + goto teardown; + } + + endLoc = clang_getLocation(TU, file, second_line, second_column); + if (clang_equalLocations(clang_getNullLocation(), endLoc)) { + fprintf(stderr, "invalid source location %s:%d:%d\n", filename, + second_line, second_column); + errorCode = -1; + goto teardown; + } + + range = clang_getRange(startLoc, endLoc); + clang_tokenize(TU, range, &tokens, &num_tokens); + cursors = (CXCursor *)malloc(num_tokens * sizeof(CXCursor)); + clang_annotateTokens(TU, tokens, num_tokens, cursors); + for (i = 0; i != num_tokens; ++i) { + const char *kind = "<unknown>"; + CXString spelling = clang_getTokenSpelling(TU, tokens[i]); + CXSourceRange extent = clang_getTokenExtent(TU, tokens[i]); + unsigned start_line, start_column, end_line, end_column; + + switch (clang_getTokenKind(tokens[i])) { + case CXToken_Punctuation: kind = "Punctuation"; break; + case CXToken_Keyword: kind = "Keyword"; break; + case CXToken_Identifier: kind = "Identifier"; break; + case CXToken_Literal: kind = "Literal"; break; + case CXToken_Comment: kind = "Comment"; break; + } + clang_getInstantiationLocation(clang_getRangeStart(extent), + 0, &start_line, &start_column); + clang_getInstantiationLocation(clang_getRangeEnd(extent), + 0, &end_line, &end_column); + printf("%s: \"%s\" [%d:%d - %d:%d]\n", kind, clang_getCString(spelling), + start_line, start_column, end_line, end_column); + } + free(cursors); + + teardown: + clang_disposeTranslationUnit(TU); + clang_disposeIndex(CIdx); + free(filename); + free_remapped_files(unsaved_files, num_unsaved_files); + return errorCode; +} + /******************************************************************************/ /* Command line processing. */ /******************************************************************************/ @@ -712,8 +831,9 @@ static void print_usage(void) { " c-index-test -test-load-tu-usrs <AST file> <symbol filter> " "[FileCheck prefix]\n" " c-index-test -test-load-source <symbol filter> {<args>}*\n" - " c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n\n"); + " c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n"); fprintf(stderr, + " c-index-test -test-annotate-tokens=<range> {<args>}* \n\n" " <symbol filter> values:\n%s", " all - load all symbols, including those from PCH\n" " local - load all symbols except those in PCH\n" @@ -743,7 +863,8 @@ int main(int argc, const char **argv) { else if (argc >= 4 && strcmp(argv[1], "-test-file-scan") == 0) return perform_file_scan(argv[2], argv[3], argc >= 5 ? argv[4] : 0); - + else if (argc > 2 && strstr(argv[1], "-test-annotate-tokens=") == argv[1]) + return perform_token_annotation(argc, argv); print_usage(); return 1; } |