diff options
author | Douglas Gregor <dgregor@apple.com> | 2010-01-26 17:06:03 +0000 |
---|---|---|
committer | Douglas Gregor <dgregor@apple.com> | 2010-01-26 17:06:03 +0000 |
commit | fc8ea23eb6cbaaa5046f2abb4c033e24c8659efd (patch) | |
tree | 33d33eb29395938f4ecc90667086bf6766db122e /tools | |
parent | b896f625d1225450c0b30c4b82cb4d9af5642b9f (diff) |
Introduce a CIndex API for lexing the raw tokens within a given source
range. The token-annotation function does nothing, yet.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@94551 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r-- | tools/CIndex/CIndex.cpp | 192 | ||||
-rw-r--r-- | tools/CIndex/CIndex.exports | 7 | ||||
-rw-r--r-- | tools/CIndex/CIndexer.h | 3 | ||||
-rw-r--r-- | tools/CIndex/CXSourceLocation.h | 4 | ||||
-rw-r--r-- | tools/c-index-test/c-index-test.c | 181 |
5 files changed, 355 insertions, 32 deletions
diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp index 03519adc0a..55061cba71 100644 --- a/tools/CIndex/CIndex.cpp +++ b/tools/CIndex/CIndex.cpp @@ -876,6 +876,21 @@ CXString CIndexer::createCXString(const char *String, bool DupString){ return Str; } +CXString CIndexer::createCXString(llvm::StringRef String, bool DupString) { + CXString Result; + if (DupString || (!String.empty() && String.data()[String.size()] != 0)) { + char *Spelling = (char *)malloc(String.size() + 1); + memmove(Spelling, String.data(), String.size()); + Spelling[String.size()] = 0; + Result.Spelling = Spelling; + Result.MustFreeString = 1; + } else { + Result.Spelling = String.data(); + Result.MustFreeString = 0; + } + return Result; +} + extern "C" { CXIndex clang_createIndex(int excludeDeclarationsFromPCH, int displayDiagnostics) { @@ -1882,6 +1897,183 @@ void clang_getDefinitionSpellingAndExtent(CXCursor C, } // end: extern "C" //===----------------------------------------------------------------------===// +// Token-based Operations. +//===----------------------------------------------------------------------===// + +/* CXToken layout: + * int_data[0]: a CXTokenKind + * int_data[1]: starting token location + * int_data[2]: token length + * int_data[3]: reserved + * ptr_data: for identifiers and keywords, an IdentifierInfo*. + * otherwise unused. + */ +extern "C" { + +CXTokenKind clang_getTokenKind(CXToken CXTok) { + return static_cast<CXTokenKind>(CXTok.int_data[0]); +} + +CXString clang_getTokenSpelling(CXTranslationUnit TU, CXToken CXTok) { + switch (clang_getTokenKind(CXTok)) { + case CXToken_Identifier: + case CXToken_Keyword: + // We know we have an IdentifierInfo*, so use that. + return CIndexer::createCXString( + static_cast<IdentifierInfo *>(CXTok.ptr_data)->getNameStart()); + + case CXToken_Literal: { + // We have stashed the starting pointer in the ptr_data field. Use it. + const char *Text = static_cast<const char *>(CXTok.ptr_data); + return CIndexer::createCXString(llvm::StringRef(Text, CXTok.int_data[2]), + true); + } + + case CXToken_Punctuation: + case CXToken_Comment: + break; + } + + // We have to find the starting buffer pointer the hard way, by + // deconstructing the source location. + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) + return CIndexer::createCXString(""); + + SourceLocation Loc = SourceLocation::getFromRawEncoding(CXTok.int_data[1]); + std::pair<FileID, unsigned> LocInfo + = CXXUnit->getSourceManager().getDecomposedLoc(Loc); + std::pair<const char *,const char *> Buffer + = CXXUnit->getSourceManager().getBufferData(LocInfo.first); + + return CIndexer::createCXString(llvm::StringRef(Buffer.first+LocInfo.second, + CXTok.int_data[2]), + true); +} + +CXSourceLocation clang_getTokenLocation(CXTranslationUnit TU, CXToken CXTok) { + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) + return clang_getNullLocation(); + + return cxloc::translateSourceLocation(CXXUnit->getASTContext(), + SourceLocation::getFromRawEncoding(CXTok.int_data[1])); +} + +CXSourceRange clang_getTokenExtent(CXTranslationUnit TU, CXToken CXTok) { + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit) { + CXSourceRange Result = { 0, 0, 0 }; + return Result; + } + + return cxloc::translateSourceRange(CXXUnit->getASTContext(), + SourceLocation::getFromRawEncoding(CXTok.int_data[1])); +} + +void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, + CXToken **Tokens, unsigned *NumTokens) { + if (Tokens) + *Tokens = 0; + if (NumTokens) + *NumTokens = 0; + + ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU); + if (!CXXUnit || !Tokens || !NumTokens) + return; + + SourceRange R = cxloc::translateSourceRange(Range); + if (R.isInvalid()) + return; + + SourceManager &SourceMgr = CXXUnit->getSourceManager(); + std::pair<FileID, unsigned> BeginLocInfo + = SourceMgr.getDecomposedLoc(R.getBegin()); + std::pair<FileID, unsigned> EndLocInfo + = SourceMgr.getDecomposedLoc(R.getEnd()); + + // Cannot tokenize across files. + if (BeginLocInfo.first != EndLocInfo.first) + return; + + // Create a lexer + std::pair<const char *,const char *> Buffer + = SourceMgr.getBufferData(BeginLocInfo.first); + Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first), + CXXUnit->getASTContext().getLangOptions(), + Buffer.first, Buffer.first + BeginLocInfo.second, Buffer.second); + Lex.SetCommentRetentionState(true); + + // Lex tokens until we hit the end of the range. + const char *EffectiveBufferEnd = Buffer.first + EndLocInfo.second; + llvm::SmallVector<CXToken, 32> CXTokens; + Token Tok; + do { + // Lex the next token + Lex.LexFromRawLexer(Tok); + if (Tok.is(tok::eof)) + break; + + // Initialize the CXToken. + CXToken CXTok; + + // - Common fields + CXTok.int_data[1] = Tok.getLocation().getRawEncoding(); + CXTok.int_data[2] = Tok.getLength(); + CXTok.int_data[3] = 0; + + // - Kind-specific fields + if (Tok.isLiteral()) { + CXTok.int_data[0] = CXToken_Literal; + CXTok.ptr_data = (void *)Tok.getLiteralData(); + } else if (Tok.is(tok::identifier)) { + // Lookup the identifier to determine whether we have a + std::pair<FileID, unsigned> LocInfo + = SourceMgr.getDecomposedLoc(Tok.getLocation()); + const char *StartPos + = CXXUnit->getSourceManager().getBufferData(LocInfo.first).first + + LocInfo.second; + IdentifierInfo *II + = CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok, StartPos); + CXTok.int_data[0] = II->getTokenID() == tok::identifier? + CXToken_Identifier + : CXToken_Keyword; + CXTok.ptr_data = II; + } else if (Tok.is(tok::comment)) { + CXTok.int_data[0] = CXToken_Comment; + CXTok.ptr_data = 0; + } else { + CXTok.int_data[0] = CXToken_Punctuation; + CXTok.ptr_data = 0; + } + CXTokens.push_back(CXTok); + } while (Lex.getBufferLocation() <= EffectiveBufferEnd); + + if (CXTokens.empty()) + return; + + *Tokens = (CXToken *)malloc(sizeof(CXToken) * CXTokens.size()); + memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size()); + *NumTokens = CXTokens.size(); +} + +void clang_annotateTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens, + CXCursor *Cursors) { + // FIXME: Actually perform some meaningful lookup here. + for (unsigned I = 0; I != NumTokens; ++I) + Cursors[I] = clang_getNullCursor(); +} + +void clang_disposeTokens(CXTranslationUnit TU, + CXToken *Tokens, unsigned NumTokens) { + if (Tokens) + free(Tokens); +} + +} // end: extern "C" + +//===----------------------------------------------------------------------===// // CXString Operations. //===----------------------------------------------------------------------===// diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports index b2ec58e5b9..fa141fc41c 100644 --- a/tools/CIndex/CIndex.exports +++ b/tools/CIndex/CIndex.exports @@ -1,3 +1,4 @@ +_clang_annotateTokens _clang_codeComplete _clang_createIndex _clang_createTranslationUnit @@ -5,6 +6,7 @@ _clang_createTranslationUnitFromSourceFile _clang_disposeCodeCompleteResults _clang_disposeIndex _clang_disposeString +_clang_disposeTokens _clang_disposeTranslationUnit _clang_equalCursors _clang_equalLocations @@ -35,6 +37,10 @@ _clang_getNumCompletionChunks _clang_getRange _clang_getRangeEnd _clang_getRangeStart +_clang_getTokenExtent +_clang_getTokenKind +_clang_getTokenLocation +_clang_getTokenSpelling _clang_getTranslationUnitCursor _clang_getTranslationUnitSpelling _clang_isCursorDefinition @@ -45,4 +51,5 @@ _clang_isReference _clang_isStatement _clang_isTranslationUnit _clang_setUseExternalASTGeneration +_clang_tokenize _clang_visitChildren diff --git a/tools/CIndex/CIndexer.h b/tools/CIndex/CIndexer.h index d01454f9dc..aa63ec0238 100644 --- a/tools/CIndex/CIndexer.h +++ b/tools/CIndex/CIndexer.h @@ -18,6 +18,7 @@ #include "clang-c/Index.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/ASTUnit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/System/Path.h" #include <vector> @@ -76,6 +77,8 @@ public: std::string getClangResourcesPath(); static CXString createCXString(const char *String, bool DupString = false); + static CXString createCXString(llvm::StringRef String, + bool DupString = false); }; namespace clang { diff --git a/tools/CIndex/CXSourceLocation.h b/tools/CIndex/CXSourceLocation.h index 0eab273c35..1f15f0832c 100644 --- a/tools/CIndex/CXSourceLocation.h +++ b/tools/CIndex/CXSourceLocation.h @@ -38,8 +38,8 @@ static inline CXSourceLocation translateSourceLocation(ASTContext &Context, static inline CXSourceRange translateSourceRange(ASTContext &Context, SourceRange R) { CXSourceRange Result = { &Context, - R.getBegin().getRawEncoding(), - R.getEnd().getRawEncoding() }; + R.getBegin().getRawEncoding(), + R.getEnd().getRawEncoding() }; return Result; } diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c index 4ef3904139..222ffbaa63 100644 --- a/tools/c-index-test/c-index-test.c +++ b/tools/c-index-test/c-index-test.c @@ -481,42 +481,62 @@ static int perform_file_scan(const char *ast_file, const char *source_file, on failure. If successful, the pointer *filename will contain newly-allocated memory (that will be owned by the caller) to store the file name. */ int parse_file_line_column(const char *input, char **filename, unsigned *line, - unsigned *column) { + unsigned *column, unsigned *second_line, + unsigned *second_column) { /* Find the second colon. */ - const char *second_colon = strrchr(input, ':'), *first_colon; + const char *last_colon = strrchr(input, ':'); + unsigned values[4], i; + unsigned num_values = (second_line && second_column)? 4 : 2; + char *endptr = 0; - if (!second_colon || second_colon == input) { - fprintf(stderr, "could not parse filename:line:column in '%s'\n", input); + if (!last_colon || last_colon == input) { + if (num_values == 4) + fprintf(stderr, "could not parse filename:line:column:line:column in " + "'%s'\n", input); + else + fprintf(stderr, "could not parse filename:line:column in '%s'\n", input); return 1; } - /* Parse the column number. */ - *column = strtol(second_colon + 1, &endptr, 10); - if (*endptr != 0) { - fprintf(stderr, "could not parse column in '%s'\n", input); - return 1; - } + for (i = 0; i != num_values; ++i) { + const char *prev_colon; - /* Find the first colon. */ - first_colon = second_colon - 1; - while (first_colon != input && *first_colon != ':') - --first_colon; - if (first_colon == input) { - fprintf(stderr, "could not parse line in '%s'\n", input); - return 1; - } + /* Parse the next line or column. */ + values[num_values - i - 1] = strtol(last_colon + 1, &endptr, 10); + if (*endptr != 0 && *endptr != ':') { + fprintf(stderr, "could not parse %s in '%s'\n", + (i % 2 ? "column" : "line"), input); + return 1; + } + + if (i + 1 == num_values) + break; - /* Parse the line number. */ - *line = strtol(first_colon + 1, &endptr, 10); - if (*endptr != ':') { - fprintf(stderr, "could not parse line in '%s'\n", input); - return 1; + /* Find the previous colon. */ + prev_colon = last_colon - 1; + while (prev_colon != input && *prev_colon != ':') + --prev_colon; + if (prev_colon == input) { + fprintf(stderr, "could not parse %s in '%s'\n", + (i % 2 == 0? "column" : "line"), input); + return 1; + } + + last_colon = prev_colon; } + + *line = values[0]; + *column = values[1]; + if (second_line && second_column) { + *second_line = values[2]; + *second_column = values[3]; + } + /* Copy the file name. */ - *filename = (char*)malloc(first_colon - input + 1); - memcpy(*filename, input, first_colon - input); - (*filename)[first_colon - input] = 0; + *filename = (char*)malloc(last_colon - input + 1); + memcpy(*filename, input, last_colon - input); + (*filename)[last_colon - input] = 0; return 0; } @@ -595,7 +615,8 @@ int perform_code_completion(int argc, const char **argv) { CXCodeCompleteResults *results = 0; input += strlen("-code-completion-at="); - if ((errorCode = parse_file_line_column(input, &filename, &line, &column))) + if ((errorCode = parse_file_line_column(input, &filename, &line, &column, + 0, 0))) return errorCode; if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files)) @@ -650,7 +671,7 @@ int inspect_cursor_at(int argc, const char **argv) { const char *input = argv[Loc + 1] + strlen("-cursor-at="); if ((errorCode = parse_file_line_column(input, &Locations[Loc].filename, &Locations[Loc].line, - &Locations[Loc].column))) + &Locations[Loc].column, 0, 0))) return errorCode; } @@ -689,6 +710,104 @@ int inspect_cursor_at(int argc, const char **argv) { return 0; } +int perform_token_annotation(int argc, const char **argv) { + const char *input = argv[1]; + char *filename = 0; + unsigned line, second_line; + unsigned column, second_column; + CXIndex CIdx; + CXTranslationUnit TU = 0; + int errorCode; + struct CXUnsavedFile *unsaved_files = 0; + int num_unsaved_files = 0; + CXToken *tokens; + unsigned num_tokens; + CXSourceRange range; + CXSourceLocation startLoc, endLoc; + CXFile file = 0; + CXCursor *cursors = 0; + unsigned i; + + input += strlen("-test-annotate-tokens="); + if ((errorCode = parse_file_line_column(input, &filename, &line, &column, + &second_line, &second_column))) + return errorCode; + + if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files)) + return -1; + + CIdx = clang_createIndex(0, 0); + TU = clang_createTranslationUnitFromSourceFile(CIdx, argv[argc - 1], + argc - num_unsaved_files - 3, + argv + num_unsaved_files + 2, + num_unsaved_files, + unsaved_files); + if (!TU) { + fprintf(stderr, "unable to parse input\n"); + clang_disposeIndex(CIdx); + free(filename); + free_remapped_files(unsaved_files, num_unsaved_files); + return -1; + } + errorCode = 0; + + file = clang_getFile(TU, filename); + if (!file) { + fprintf(stderr, "file %s is not in this translation unit\n", filename); + errorCode = -1; + goto teardown; + } + + startLoc = clang_getLocation(TU, file, line, column); + if (clang_equalLocations(clang_getNullLocation(), startLoc)) { + fprintf(stderr, "invalid source location %s:%d:%d\n", filename, line, + column); + errorCode = -1; + goto teardown; + } + + endLoc = clang_getLocation(TU, file, second_line, second_column); + if (clang_equalLocations(clang_getNullLocation(), endLoc)) { + fprintf(stderr, "invalid source location %s:%d:%d\n", filename, + second_line, second_column); + errorCode = -1; + goto teardown; + } + + range = clang_getRange(startLoc, endLoc); + clang_tokenize(TU, range, &tokens, &num_tokens); + cursors = (CXCursor *)malloc(num_tokens * sizeof(CXCursor)); + clang_annotateTokens(TU, tokens, num_tokens, cursors); + for (i = 0; i != num_tokens; ++i) { + const char *kind = "<unknown>"; + CXString spelling = clang_getTokenSpelling(TU, tokens[i]); + CXSourceRange extent = clang_getTokenExtent(TU, tokens[i]); + unsigned start_line, start_column, end_line, end_column; + + switch (clang_getTokenKind(tokens[i])) { + case CXToken_Punctuation: kind = "Punctuation"; break; + case CXToken_Keyword: kind = "Keyword"; break; + case CXToken_Identifier: kind = "Identifier"; break; + case CXToken_Literal: kind = "Literal"; break; + case CXToken_Comment: kind = "Comment"; break; + } + clang_getInstantiationLocation(clang_getRangeStart(extent), + 0, &start_line, &start_column); + clang_getInstantiationLocation(clang_getRangeEnd(extent), + 0, &end_line, &end_column); + printf("%s: \"%s\" [%d:%d - %d:%d]\n", kind, clang_getCString(spelling), + start_line, start_column, end_line, end_column); + } + free(cursors); + + teardown: + clang_disposeTranslationUnit(TU); + clang_disposeIndex(CIdx); + free(filename); + free_remapped_files(unsaved_files, num_unsaved_files); + return errorCode; +} + /******************************************************************************/ /* Command line processing. */ /******************************************************************************/ @@ -712,8 +831,9 @@ static void print_usage(void) { " c-index-test -test-load-tu-usrs <AST file> <symbol filter> " "[FileCheck prefix]\n" " c-index-test -test-load-source <symbol filter> {<args>}*\n" - " c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n\n"); + " c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n"); fprintf(stderr, + " c-index-test -test-annotate-tokens=<range> {<args>}* \n\n" " <symbol filter> values:\n%s", " all - load all symbols, including those from PCH\n" " local - load all symbols except those in PCH\n" @@ -743,7 +863,8 @@ int main(int argc, const char **argv) { else if (argc >= 4 && strcmp(argv[1], "-test-file-scan") == 0) return perform_file_scan(argv[2], argv[3], argc >= 5 ? argv[4] : 0); - + else if (argc > 2 && strstr(argv[1], "-test-annotate-tokens=") == argv[1]) + return perform_token_annotation(argc, argv); print_usage(); return 1; } |