Comment lexing: fix lexing to actually work in non-error cases.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159963 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dmitri Gribenko <gribozavr@gmail.com> 2012-07-09 21:32:40 +0000
committer: Dmitri Gribenko <gribozavr@gmail.com> 2012-07-09 21:32:40 +0000
commit: a99ec107ba6b5abaf27c6cc9318e65689163f2a1 (patch)
tree: aa25e23366c546a85e3bb12e2c006f6805e44140 /lib/AST/CommentLexer.cpp
parent: 34f60a4a7fb87e9f4dfd08f8751ce76db9981215 (diff)
1 files changed, 18 insertions, 13 deletions
diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp
index 77d2a9b72d..55cd409a9c 100644
--- a/lib/AST/CommentLexer.cpp
+++ b/lib/AST/CommentLexer.cpp
@@ -147,6 +147,11 @@ const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
   return BufferPtr;
 }
 
+bool isHTMLIdentifierStartingCharacter(char C) {
+  return (C >= 'a' && C <= 'z') ||
+         (C >= 'A' && C <= 'Z');
+}
+
 bool isHTMLIdentifierCharacter(char C) {
   return (C >= 'a' && C <= 'z') ||
          (C >= 'A' && C <= 'Z') ||
@@ -357,7 +362,7 @@ void Lexer::lexCommentText(Token &T) {
           return;
         }
         const char C = *TokenPtr;
-        if (isHTMLIdentifierCharacter(C))
+        if (isHTMLIdentifierStartingCharacter(C))
           setupAndLexHTMLOpenTag(T);
         else if (C == '/')
           setupAndLexHTMLCloseTag(T);
@@ -383,7 +388,7 @@ void Lexer::lexCommentText(Token &T) {
           TokenPtr++;
           if (TokenPtr == CommentEnd)
             break;
-          char C = *TokenPtr;
+          const char C = *TokenPtr;
           if(C == '\n' || C == '\r' ||
              C == '\\' || C == '@' || C == '<')
             break;
@@ -492,7 +497,8 @@ void Lexer::lexVerbatimLineText(Token &T) {
 }
 
 void Lexer::setupAndLexHTMLOpenTag(Token &T) {
-  assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
+  assert(BufferPtr[0] == '<' &&
+         isHTMLIdentifierStartingCharacter(BufferPtr[1]));
   const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
 
   StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
@@ -501,12 +507,9 @@ void Lexer::setupAndLexHTMLOpenTag(Token &T) {
 
   BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
 
-  if (BufferPtr != CommentEnd && *BufferPtr == '>') {
-    BufferPtr++;
-    return;
-  }
-
-  if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
+  const char C = *BufferPtr;
+  if (BufferPtr != CommentEnd &&
+      (C == '>' || isHTMLIdentifierStartingCharacter(C)))
     State = LS_HTMLOpenTag;
 }
 
@@ -541,7 +544,8 @@ void Lexer::lexHTMLOpenTag(Token &T) {
     case '>':
       TokenPtr++;
       formTokenWithChars(T, TokenPtr, tok::html_greater);
-      break;
+      State = LS_Normal;
+      return;
     }
   }
 
@@ -554,7 +558,7 @@ void Lexer::lexHTMLOpenTag(Token &T) {
   }
 
   C = *BufferPtr;
-  if (!isHTMLIdentifierCharacter(C) &&
+  if (!isHTMLIdentifierStartingCharacter(C) &&
       C != '=' && C != '\"' && C != '\'' && C != '>') {
     State = LS_Normal;
     return;
@@ -656,8 +660,9 @@ again:
       EndWhitespace++;
 
     // Turn any whitespace between comments (and there is only whitespace
-    // between them) into a newline.  We have two newlines between C comments
-    // in total (first one was synthesized after a comment).
+    // between them -- guaranteed by comment extraction) into a newline.  We
+    // have two newlines between C comments in total (first one was synthesized
+    // after a comment).
     formTokenWithChars(T, EndWhitespace, tok::newline);
 
     CommentState = LCS_BeforeComment;
author	Dmitri Gribenko <gribozavr@gmail.com>	2012-07-09 21:32:40 +0000
committer	Dmitri Gribenko <gribozavr@gmail.com>	2012-07-09 21:32:40 +0000
commit	a99ec107ba6b5abaf27c6cc9318e65689163f2a1 (patch)
tree	aa25e23366c546a85e3bb12e2c006f6805e44140 /lib/AST/CommentLexer.cpp
parent	34f60a4a7fb87e9f4dfd08f8751ce76db9981215 (diff)