Introduce a CIndex API for lexing the raw tokens within a given source

range. The token-annotation function does nothing, yet. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@94551 91177308-0d34-0410-b5e6-96231b3b80d8
author: Douglas Gregor <dgregor@apple.com> 2010-01-26 17:06:03 +0000
committer: Douglas Gregor <dgregor@apple.com> 2010-01-26 17:06:03 +0000
commit: fc8ea23eb6cbaaa5046f2abb4c033e24c8659efd (patch)
tree: 33d33eb29395938f4ecc90667086bf6766db122e
parent: b896f625d1225450c0b30c4b82cb4d9af5642b9f (diff)
8 files changed, 540 insertions, 32 deletions
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index ab7e55bcb6..ff0a0e1f09 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -861,6 +861,125 @@ CINDEX_LINKAGE unsigned clang_isCursorDefinition(CXCursor);
  */
 
 /**
+ * \defgroup CINDEX_LEX Lexing and syntactic analysis
+ *
+ * @{
+ */
+
+/**
+ * \brief Describes a kind of token.
+ */
+typedef enum CXTokenKind {
+  /**
+   * \brief A token that contains some kind of punctuation.
+   */
+  CXToken_Punctuation,
+  
+  /**
+   * \brief A a language keyword.
+   */
+  CXToken_Keyword,
+  
+  /**
+   * \brief An identifier (that is not a keyword).
+   */
+  CXToken_Identifier,
+  
+  /**
+   * \brief A numeric, string, or character literal.
+   */
+  CXToken_Literal,
+  
+  /**
+   * \brief A comment.
+   */
+  CXToken_Comment
+} CXTokenKind;
+
+/**
+ * \brief Describes a single preprocessing token.
+ */
+typedef struct {
+  unsigned int_data[4];
+  void *ptr_data;
+} CXToken;
+
+/**
+ * \brief Determine the kind of the given token.
+ */
+CINDEX_LINKAGE CXTokenKind clang_getTokenKind(CXToken);
+  
+/**
+ * \brief Determine the spelling of the given token.
+ *
+ * The spelling of a token is the textual representation of that token, e.g.,
+ * the text of an identifier or keyword.
+ */
+CINDEX_LINKAGE CXString clang_getTokenSpelling(CXTranslationUnit, CXToken);
+  
+/**
+ * \brief Retrieve the source location of the given token.
+ */
+CINDEX_LINKAGE CXSourceLocation clang_getTokenLocation(CXTranslationUnit, 
+                                                       CXToken);
+  
+/**
+ * \brief Retrieve a source range that covers the given token.
+ */
+CINDEX_LINKAGE CXSourceRange clang_getTokenExtent(CXTranslationUnit, CXToken);
+
+/**
+ * \brief Tokenize the source code described by the given range into raw
+ * lexical tokens.
+ *
+ * \param TU the translation unit whose text is being tokenized.
+ *
+ * \param Range the source range in which text should be tokenized. All of the
+ * tokens produced by tokenization will fall within this source range,
+ *
+ * \param Tokens this pointer will be set to point to the array of tokens
+ * that occur within the given source range. The returned pointer must be
+ * freed with clang_disposeTokens() before the translation unit is destroyed.
+ *
+ * \param NumTokens will be set to the number of tokens in the \c *Tokens
+ * array.
+ *
+ */
+CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
+                                   CXToken **Tokens, unsigned *NumTokens);
+  
+/**
+ * \brief Annotate the given set of tokens by providing cursors for each token
+ * that can be mapped to a specific entity within the abstract syntax tree.
+ *
+ * This token-annotation routine is equivalent to invoking clang_getCursor() 
+ * for the source locations of each of the tokens, then accepting only those
+ * cursors that refer to a specific token.
+ *
+ * \param TU the translation unit that owns the given tokens.
+ *
+ * \param Tokens the set of tokens to annotate.
+ *
+ * \param NumTokens the number of tokens in \p Tokens.
+ *
+ * \param Cursors an array of \p NumTokens cursors, whose contents will be
+ * replaced with the cursors corresponding to each token.
+ */
+CINDEX_LINKAGE void clang_annotateTokens(CXTranslationUnit TU,
+                                         CXToken *Tokens, unsigned NumTokens,
+                                         CXCursor *Cursors);
+  
+/**
+ * \brief Free the given set of tokens.
+ */
+CINDEX_LINKAGE void clang_disposeTokens(CXTranslationUnit TU, 
+                                        CXToken *Tokens, unsigned NumTokens);
+  
+/**
+ * @}
+ */
+  
+/**
  * \defgroup CINDEX_DEBUG Debugging facilities
  *
  * These routines are used for testing and debugging, only, and should not
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 0f36df43e2..6a6e319463 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -199,6 +199,9 @@ public:
   /// the current file.
   SourceLocation getSourceLocation() { return getSourceLocation(BufferPtr); }
 
+  /// \brief Return the current location in the buffer.
+  const char *getBufferLocation() const { return BufferPtr; }
+  
   /// Stringify - Convert the specified string into a C string by escaping '\'
   /// and " characters.  This does not add surrounding ""'s to the string.
   /// If Charify is true, this escapes the ' character instead of ".
diff --git a/test/Index/annotate-tokens.c b/test/Index/annotate-tokens.c
new file mode 100644
index 0000000000..6d2b4d24f0
--- /dev/null
+++ b/test/Index/annotate-tokens.c
@@ -0,0 +1,63 @@
+typedef int T;
+struct X { int a, b; };
+void f(void *ptr) {
+  T* t_ptr = (T *)ptr;
+  (void)sizeof(T);
+  /* A comment */
+  struct X x = (struct X){1, 2};
+  void *xx = ptr ? : &x;
+  const char * hello = "Hello";
+}
+
+// RUN: c-index-test -test-annotate-tokens=%s:4:1:9:32 %s | FileCheck %s
+// CHECK: Identifier: "T" [4:3 - 4:3]
+// CHECK: Punctuation: "*" [4:4 - 4:4]
+// CHECK: Identifier: "t_ptr" [4:6 - 4:10]
+// CHECK: Punctuation: "=" [4:12 - 4:12]
+// CHECK: Punctuation: "(" [4:14 - 4:14]
+// CHECK: Identifier: "T" [4:15 - 4:15]
+// CHECK: Punctuation: "*" [4:17 - 4:17]
+// CHECK: Punctuation: ")" [4:18 - 4:18]
+// CHECK: Identifier: "ptr" [4:19 - 4:21]
+// CHECK: Punctuation: ";" [4:22 - 4:22]
+// CHECK: Punctuation: "(" [5:3 - 5:3]
+// CHECK: Keyword: "void" [5:4 - 5:7]
+// CHECK: Punctuation: ")" [5:8 - 5:8]
+// CHECK: Keyword: "sizeof" [5:9 - 5:14]
+// CHECK: Punctuation: "(" [5:15 - 5:15]
+// CHECK: Identifier: "T" [5:16 - 5:16]
+// CHECK: Punctuation: ")" [5:17 - 5:17]
+// CHECK: Punctuation: ";" [5:18 - 5:18]
+// CHECK: Comment: "/* A comment */" [6:3 - 6:17]
+// CHECK: Keyword: "struct" [7:3 - 7:8]
+// CHECK: Identifier: "X" [7:10 - 7:10]
+// CHECK: Identifier: "x" [7:12 - 7:12]
+// CHECK: Punctuation: "=" [7:14 - 7:14]
+// CHECK: Punctuation: "(" [7:16 - 7:16]
+// CHECK: Keyword: "struct" [7:17 - 7:22]
+// CHECK: Identifier: "X" [7:24 - 7:24]
+// CHECK: Punctuation: ")" [7:25 - 7:25]
+// CHECK: Punctuation: "{" [7:26 - 7:26]
+// CHECK: Literal: "1" [7:27 - 7:27]
+// CHECK: Punctuation: "," [7:28 - 7:28]
+// CHECK: Literal: "2" [7:30 - 7:30]
+// CHECK: Punctuation: "}" [7:31 - 7:31]
+// CHECK: Punctuation: ";" [7:32 - 7:32]
+// CHECK: Keyword: "void" [8:3 - 8:6]
+// CHECK: Punctuation: "*" [8:8 - 8:8]
+// CHECK: Identifier: "xx" [8:9 - 8:10]
+// CHECK: Punctuation: "=" [8:12 - 8:12]
+// CHECK: Identifier: "ptr" [8:14 - 8:16]
+// CHECK: Punctuation: "?" [8:18 - 8:18]
+// CHECK: Punctuation: ":" [8:20 - 8:20]
+// CHECK: Punctuation: "&" [8:22 - 8:22]
+// CHECK: Identifier: "x" [8:23 - 8:23]
+// CHECK: Punctuation: ";" [8:24 - 8:24]
+// CHECK: Keyword: "const" [9:3 - 9:7]
+// CHECK: Keyword: "char" [9:9 - 9:12]
+// CHECK: Punctuation: "*" [9:14 - 9:14]
+// CHECK: Identifier: "hello" [9:16 - 9:20]
+// CHECK: Punctuation: "=" [9:22 - 9:22]
+// CHECK: Literal: ""Hello"" [9:24 - 9:30]
+// CHECK: Punctuation: ";" [9:31 - 9:31]
+// CHECK: Punctuation: "}" [10:1 - 10:1]
diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp
index 03519adc0a..55061cba71 100644
--- a/tools/CIndex/CIndex.cpp
+++ b/tools/CIndex/CIndex.cpp
@@ -876,6 +876,21 @@ CXString CIndexer::createCXString(const char *String, bool DupString){
   return Str;
 }
 
+CXString CIndexer::createCXString(llvm::StringRef String, bool DupString) {
+  CXString Result;
+  if (DupString || (!String.empty() && String.data()[String.size()] != 0)) {
+    char *Spelling = (char *)malloc(String.size() + 1);
+    memmove(Spelling, String.data(), String.size());
+    Spelling[String.size()] = 0;
+    Result.Spelling = Spelling;
+    Result.MustFreeString = 1;
+  } else {
+    Result.Spelling = String.data();
+    Result.MustFreeString = 0;
+  }
+  return Result;
+}
+
 extern "C" {
 CXIndex clang_createIndex(int excludeDeclarationsFromPCH,
                           int displayDiagnostics) {
@@ -1882,6 +1897,183 @@ void clang_getDefinitionSpellingAndExtent(CXCursor C,
 } // end: extern "C"
 
 //===----------------------------------------------------------------------===//
+// Token-based Operations.
+//===----------------------------------------------------------------------===//
+
+/* CXToken layout:
+ *   int_data[0]: a CXTokenKind
+ *   int_data[1]: starting token location
+ *   int_data[2]: token length
+ *   int_data[3]: reserved
+ *   ptr_data: for identifiers and keywords, an IdentifierInfo*. 
+ *   otherwise unused.
+ */
+extern "C" {
+
+CXTokenKind clang_getTokenKind(CXToken CXTok) {
+  return static_cast<CXTokenKind>(CXTok.int_data[0]);
+}
+
+CXString clang_getTokenSpelling(CXTranslationUnit TU, CXToken CXTok) {
+  switch (clang_getTokenKind(CXTok)) {
+  case CXToken_Identifier:
+  case CXToken_Keyword:
+    // We know we have an IdentifierInfo*, so use that.
+    return CIndexer::createCXString(
+              static_cast<IdentifierInfo *>(CXTok.ptr_data)->getNameStart());
+
+  case CXToken_Literal: {
+    // We have stashed the starting pointer in the ptr_data field. Use it.
+    const char *Text = static_cast<const char *>(CXTok.ptr_data);
+    return CIndexer::createCXString(llvm::StringRef(Text, CXTok.int_data[2]), 
+                                    true);
+  }
+      
+  case CXToken_Punctuation:
+  case CXToken_Comment:
+    break;
+  }
+  
+  // We have to find the starting buffer pointer the hard way, by 
+  // deconstructing the source location.
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU);
+  if (!CXXUnit)
+    return CIndexer::createCXString("");
+  
+  SourceLocation Loc = SourceLocation::getFromRawEncoding(CXTok.int_data[1]);
+  std::pair<FileID, unsigned> LocInfo
+    = CXXUnit->getSourceManager().getDecomposedLoc(Loc);
+  std::pair<const char *,const char *> Buffer
+    = CXXUnit->getSourceManager().getBufferData(LocInfo.first);
+
+  return CIndexer::createCXString(llvm::StringRef(Buffer.first+LocInfo.second,
+                                                  CXTok.int_data[2]), 
+                                  true);
+}
+ 
+CXSourceLocation clang_getTokenLocation(CXTranslationUnit TU, CXToken CXTok) {
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU);
+  if (!CXXUnit)
+    return clang_getNullLocation();
+  
+  return cxloc::translateSourceLocation(CXXUnit->getASTContext(),
+                        SourceLocation::getFromRawEncoding(CXTok.int_data[1]));
+}
+
+CXSourceRange clang_getTokenExtent(CXTranslationUnit TU, CXToken CXTok) {
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU);
+  if (!CXXUnit) {
+    CXSourceRange Result = { 0, 0, 0 };
+    return Result;
+  }
+  
+  return cxloc::translateSourceRange(CXXUnit->getASTContext(), 
+                        SourceLocation::getFromRawEncoding(CXTok.int_data[1]));
+}
+  
+void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
+                    CXToken **Tokens, unsigned *NumTokens) {
+  if (Tokens)
+    *Tokens = 0;
+  if (NumTokens)
+    *NumTokens = 0;
+  
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU);
+  if (!CXXUnit || !Tokens || !NumTokens)
+    return;
+  
+  SourceRange R = cxloc::translateSourceRange(Range);
+  if (R.isInvalid())
+    return;
+  
+  SourceManager &SourceMgr = CXXUnit->getSourceManager();
+  std::pair<FileID, unsigned> BeginLocInfo
+    = SourceMgr.getDecomposedLoc(R.getBegin());
+  std::pair<FileID, unsigned> EndLocInfo
+    = SourceMgr.getDecomposedLoc(R.getEnd());
+  
+  // Cannot tokenize across files.
+  if (BeginLocInfo.first != EndLocInfo.first)
+    return;
+  
+  // Create a lexer 
+  std::pair<const char *,const char *> Buffer
+    = SourceMgr.getBufferData(BeginLocInfo.first);
+  Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
+            CXXUnit->getASTContext().getLangOptions(),
+            Buffer.first, Buffer.first + BeginLocInfo.second, Buffer.second);
+  Lex.SetCommentRetentionState(true);
+  
+  // Lex tokens until we hit the end of the range.
+  const char *EffectiveBufferEnd = Buffer.first + EndLocInfo.second;
+  llvm::SmallVector<CXToken, 32> CXTokens;
+  Token Tok;
+  do {
+    // Lex the next token
+    Lex.LexFromRawLexer(Tok);
+    if (Tok.is(tok::eof))
+      break;
+    
+    // Initialize the CXToken.
+    CXToken CXTok;
+    
+    //   - Common fields
+    CXTok.int_data[1] = Tok.getLocation().getRawEncoding();
+    CXTok.int_data[2] = Tok.getLength();
+    CXTok.int_data[3] = 0;
+    
+    //   - Kind-specific fields
+    if (Tok.isLiteral()) {
+      CXTok.int_data[0] = CXToken_Literal;
+      CXTok.ptr_data = (void *)Tok.getLiteralData();
+    } else if (Tok.is(tok::identifier)) {
+      // Lookup the identifier to determine whether we have a 
+      std::pair<FileID, unsigned> LocInfo
+        = SourceMgr.getDecomposedLoc(Tok.getLocation());
+      const char *StartPos 
+        = CXXUnit->getSourceManager().getBufferData(LocInfo.first).first + 
+          LocInfo.second;
+      IdentifierInfo *II
+        = CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok, StartPos);
+      CXTok.int_data[0] = II->getTokenID() == tok::identifier?
+                               CXToken_Identifier
+                             : CXToken_Keyword;
+      CXTok.ptr_data = II;
+    } else if (Tok.is(tok::comment)) {
+      CXTok.int_data[0] = CXToken_Comment;
+      CXTok.ptr_data = 0;
+    } else {
+      CXTok.int_data[0] = CXToken_Punctuation;
+      CXTok.ptr_data = 0;
+    }
+    CXTokens.push_back(CXTok);
+  } while (Lex.getBufferLocation() <= EffectiveBufferEnd);
+  
+  if (CXTokens.empty())
+    return;
+  
+  *Tokens = (CXToken *)malloc(sizeof(CXToken) * CXTokens.size());
+  memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size());
+  *NumTokens = CXTokens.size();
+}
+  
+void clang_annotateTokens(CXTranslationUnit TU,
+                          CXToken *Tokens, unsigned NumTokens,
+                          CXCursor *Cursors) {
+  // FIXME: Actually perform some meaningful lookup here.
+  for (unsigned I = 0; I != NumTokens; ++I)
+    Cursors[I] = clang_getNullCursor();
+}
+
+void clang_disposeTokens(CXTranslationUnit TU, 
+                         CXToken *Tokens, unsigned NumTokens) {
+  if (Tokens)
+    free(Tokens);
+}
+  
+} // end: extern "C"
+
+//===----------------------------------------------------------------------===//
 // CXString Operations.
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports
index b2ec58e5b9..fa141fc41c 100644
--- a/tools/CIndex/CIndex.exports
+++ b/tools/CIndex/CIndex.exports
@@ -1,3 +1,4 @@
+_clang_annotateTokens
 _clang_codeComplete
 _clang_createIndex
 _clang_createTranslationUnit
@@ -5,6 +6,7 @@ _clang_createTranslationUnitFromSourceFile
 _clang_disposeCodeCompleteResults
 _clang_disposeIndex
 _clang_disposeString
+_clang_disposeTokens
 _clang_disposeTranslationUnit
 _clang_equalCursors
 _clang_equalLocations
@@ -35,6 +37,10 @@ _clang_getNumCompletionChunks
 _clang_getRange
 _clang_getRangeEnd
 _clang_getRangeStart
+_clang_getTokenExtent
+_clang_getTokenKind
+_clang_getTokenLocation
+_clang_getTokenSpelling
 _clang_getTranslationUnitCursor
 _clang_getTranslationUnitSpelling
 _clang_isCursorDefinition
@@ -45,4 +51,5 @@ _clang_isReference
 _clang_isStatement
 _clang_isTranslationUnit
 _clang_setUseExternalASTGeneration
+_clang_tokenize
 _clang_visitChildren
diff --git a/tools/CIndex/CIndexer.h b/tools/CIndex/CIndexer.h
index d01454f9dc..aa63ec0238 100644
--- a/tools/CIndex/CIndexer.h
+++ b/tools/CIndex/CIndexer.h
@@ -18,6 +18,7 @@
 #include "clang-c/Index.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/ASTUnit.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/System/Path.h"
 #include <vector>
 
@@ -76,6 +77,8 @@ public:
   std::string getClangResourcesPath();
 
   static CXString createCXString(const char *String, bool DupString = false);
+  static CXString createCXString(llvm::StringRef String, 
+                                 bool DupString = false);
 };
 
 namespace clang {
diff --git a/tools/CIndex/CXSourceLocation.h b/tools/CIndex/CXSourceLocation.h
index 0eab273c35..1f15f0832c 100644
--- a/tools/CIndex/CXSourceLocation.h
+++ b/tools/CIndex/CXSourceLocation.h
@@ -38,8 +38,8 @@ static inline CXSourceLocation translateSourceLocation(ASTContext &Context,
 static inline CXSourceRange translateSourceRange(ASTContext &Context,
                                                  SourceRange R) {
   CXSourceRange Result = { &Context, 
-    R.getBegin().getRawEncoding(),
-    R.getEnd().getRawEncoding() };
+                           R.getBegin().getRawEncoding(),
+                           R.getEnd().getRawEncoding() };
   return Result;
 }
 
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index 4ef3904139..222ffbaa63 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -481,42 +481,62 @@ static int perform_file_scan(const char *ast_file, const char *source_file,
    on failure. If successful, the pointer *filename will contain newly-allocated
    memory (that will be owned by the caller) to store the file name. */
 int parse_file_line_column(const char *input, char **filename, unsigned *line, 
-                           unsigned *column) {
+                           unsigned *column, unsigned *second_line,
+                           unsigned *second_column) {
   /* Find the second colon. */
-  const char *second_colon = strrchr(input, ':'), *first_colon;
+  const char *last_colon = strrchr(input, ':');
+  unsigned values[4], i;
+  unsigned num_values = (second_line && second_column)? 4 : 2;
+
   char *endptr = 0;
-  if (!second_colon || second_colon == input) {
-    fprintf(stderr, "could not parse filename:line:column in '%s'\n", input);
+  if (!last_colon || last_colon == input) {
+    if (num_values == 4)
+      fprintf(stderr, "could not parse filename:line:column:line:column in "
+              "'%s'\n", input);
+    else
+      fprintf(stderr, "could not parse filename:line:column in '%s'\n", input);
     return 1;
   }
 
-  /* Parse the column number. */
-  *column = strtol(second_colon + 1, &endptr, 10);
-  if (*endptr != 0) {
-    fprintf(stderr, "could not parse column in '%s'\n", input);
-    return 1;
-  }
+  for (i = 0; i != num_values; ++i) {
+    const char *prev_colon;
 
-  /* Find the first colon. */
-  first_colon = second_colon - 1;
-  while (first_colon != input && *first_colon != ':')
-    --first_colon;
-  if (first_colon == input) {
-    fprintf(stderr, "could not parse line in '%s'\n", input);
-    return 1;    
-  }
+    /* Parse the next line or column. */
+    values[num_values - i - 1] = strtol(last_colon + 1, &endptr, 10);
+    if (*endptr != 0 && *endptr != ':') {
+      fprintf(stderr, "could not parse %s in '%s'\n", 
+              (i % 2 ? "column" : "line"), input);
+      return 1;
+    }
+    
+    if (i + 1 == num_values)
+      break;
 
-  /* Parse the line number. */
-  *line = strtol(first_colon + 1, &endptr, 10);
-  if (*endptr != ':') {
-    fprintf(stderr, "could not parse line in '%s'\n", input);
-    return 1;
+    /* Find the previous colon. */
+    prev_colon = last_colon - 1;
+    while (prev_colon != input && *prev_colon != ':')
+      --prev_colon;
+    if (prev_colon == input) {
+      fprintf(stderr, "could not parse %s in '%s'\n", 
+              (i % 2 == 0? "column" : "line"), input);
+      return 1;    
+    }
+
+    last_colon = prev_colon;
   }
+
+  *line = values[0];
+  *column = values[1];
   
+  if (second_line && second_column) {
+    *second_line = values[2];
+    *second_column = values[3];
+  }
+
   /* Copy the file name. */
-  *filename = (char*)malloc(first_colon - input + 1);
-  memcpy(*filename, input, first_colon - input);
-  (*filename)[first_colon - input] = 0;
+  *filename = (char*)malloc(last_colon - input + 1);
+  memcpy(*filename, input, last_colon - input);
+  (*filename)[last_colon - input] = 0;
   return 0;
 }
 
@@ -595,7 +615,8 @@ int perform_code_completion(int argc, const char **argv) {
   CXCodeCompleteResults *results = 0;
 
   input += strlen("-code-completion-at=");
-  if ((errorCode = parse_file_line_column(input, &filename, &line, &column)))
+  if ((errorCode = parse_file_line_column(input, &filename, &line, &column, 
+                                          0, 0)))
     return errorCode;
 
   if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files))
@@ -650,7 +671,7 @@ int inspect_cursor_at(int argc, const char **argv) {
     const char *input = argv[Loc + 1] + strlen("-cursor-at=");
     if ((errorCode = parse_file_line_column(input, &Locations[Loc].filename, 
                                             &Locations[Loc].line, 
-                                            &Locations[Loc].column)))
+                                            &Locations[Loc].column, 0, 0)))
       return errorCode;
   }
   
@@ -689,6 +710,104 @@ int inspect_cursor_at(int argc, const char **argv) {
   return 0;
 }
 
+int perform_token_annotation(int argc, const char **argv) {
+  const char *input = argv[1];
+  char *filename = 0;
+  unsigned line, second_line;
+  unsigned column, second_column;
+  CXIndex CIdx;
+  CXTranslationUnit TU = 0;
+  int errorCode;
+  struct CXUnsavedFile *unsaved_files = 0;
+  int num_unsaved_files = 0;
+  CXToken *tokens;
+  unsigned num_tokens;
+  CXSourceRange range;
+  CXSourceLocation startLoc, endLoc;
+  CXFile file = 0;
+  CXCursor *cursors = 0;
+  unsigned i;
+
+  input += strlen("-test-annotate-tokens=");
+  if ((errorCode = parse_file_line_column(input, &filename, &line, &column,
+                                          &second_line, &second_column)))
+    return errorCode;
+
+  if (parse_remapped_files(argc, argv, 2, &unsaved_files, &num_unsaved_files))
+    return -1;
+
+  CIdx = clang_createIndex(0, 0);
+  TU = clang_createTranslationUnitFromSourceFile(CIdx, argv[argc - 1],
+                                                 argc - num_unsaved_files - 3,
+                                                 argv + num_unsaved_files + 2,
+                                                 num_unsaved_files,
+                                                 unsaved_files);
+  if (!TU) {
+    fprintf(stderr, "unable to parse input\n");
+    clang_disposeIndex(CIdx);
+    free(filename);
+    free_remapped_files(unsaved_files, num_unsaved_files);
+    return -1;
+  }  
+  errorCode = 0;
+
+  file = clang_getFile(TU, filename);
+  if (!file) {
+    fprintf(stderr, "file %s is not in this translation unit\n", filename);
+    errorCode = -1;
+    goto teardown;
+  }
+
+  startLoc = clang_getLocation(TU, file, line, column);
+  if (clang_equalLocations(clang_getNullLocation(), startLoc)) {
+    fprintf(stderr, "invalid source location %s:%d:%d\n", filename, line, 
+            column);
+    errorCode = -1;
+    goto teardown;    
+  }
+
+  endLoc = clang_getLocation(TU, file, second_line, second_column);
+  if (clang_equalLocations(clang_getNullLocation(), endLoc)) {
+    fprintf(stderr, "invalid source location %s:%d:%d\n", filename, 
+            second_line, second_column);
+    errorCode = -1;
+    goto teardown;    
+  }
+
+  range = clang_getRange(startLoc, endLoc);
+  clang_tokenize(TU, range, &tokens, &num_tokens);
+  cursors = (CXCursor *)malloc(num_tokens * sizeof(CXCursor));
+  clang_annotateTokens(TU, tokens, num_tokens, cursors);
+  for (i = 0; i != num_tokens; ++i) {
+    const char *kind = "<unknown>";
+    CXString spelling = clang_getTokenSpelling(TU, tokens[i]);
+    CXSourceRange extent = clang_getTokenExtent(TU, tokens[i]);
+    unsigned start_line, start_column, end_line, end_column;
+
+    switch (clang_getTokenKind(tokens[i])) {
+    case CXToken_Punctuation: kind = "Punctuation"; break;
+    case CXToken_Keyword: kind = "Keyword"; break;
+    case CXToken_Identifier: kind = "Identifier"; break;
+    case CXToken_Literal: kind = "Literal"; break;
+    case CXToken_Comment: kind = "Comment"; break;
+    }
+    clang_getInstantiationLocation(clang_getRangeStart(extent), 
+                                   0, &start_line, &start_column);
+    clang_getInstantiationLocation(clang_getRangeEnd(extent),
+                                   0, &end_line, &end_column);
+    printf("%s: \"%s\" [%d:%d - %d:%d]\n", kind, clang_getCString(spelling),
+           start_line, start_column, end_line, end_column);
+  }
+  free(cursors);
+
+ teardown:
+  clang_disposeTranslationUnit(TU);
+  clang_disposeIndex(CIdx);
+  free(filename);
+  free_remapped_files(unsaved_files, num_unsaved_files);
+  return errorCode;
+}
+
 /******************************************************************************/
 /* Command line processing.                                                   */
 /******************************************************************************/
@@ -712,8 +831,9 @@ static void print_usage(void) {
     "       c-index-test -test-load-tu-usrs <AST file> <symbol filter> "
            "[FileCheck prefix]\n"
     "       c-index-test -test-load-source <symbol filter> {<args>}*\n"
-    "       c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n\n");
+    "       c-index-test -test-load-source-usrs <symbol filter> {<args>}*\n");
   fprintf(stderr,
+    "       c-index-test -test-annotate-tokens=<range> {<args>}* \n\n"
     " <symbol filter> values:\n%s",
     "   all - load all symbols, including those from PCH\n"
     "   local - load all symbols except those in PCH\n"
@@ -743,7 +863,8 @@ int main(int argc, const char **argv) {
   else if (argc >= 4 && strcmp(argv[1], "-test-file-scan") == 0)
     return perform_file_scan(argv[2], argv[3],
                              argc >= 5 ? argv[4] : 0);
-
+  else if (argc > 2 && strstr(argv[1], "-test-annotate-tokens=") == argv[1])
+    return perform_token_annotation(argc, argv);
   print_usage();
   return 1;
 }
author	Douglas Gregor <dgregor@apple.com>	2010-01-26 17:06:03 +0000
committer	Douglas Gregor <dgregor@apple.com>	2010-01-26 17:06:03 +0000
commit	fc8ea23eb6cbaaa5046f2abb4c033e24c8659efd (patch)
tree	33d33eb29395938f4ecc90667086bf6766db122e
parent	b896f625d1225450c0b30c4b82cb4d9af5642b9f (diff)