#include "clang/AST/CommentLexer.h"
#include "clang/AST/CommentCommandTraits.h"
#include "clang/Basic/ConvertUTF.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
namespace clang {
namespace comments {
void Token::dump(const Lexer &L, const SourceManager &SM) const {
llvm::errs() << "comments::Token Kind=" << Kind << " ";
Loc.dump(SM);
llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
}
namespace {
bool isHTMLNamedCharacterReferenceCharacter(char C) {
return (C >= 'a' && C <= 'z') ||
(C >= 'A' && C <= 'Z');
}
bool isHTMLDecimalCharacterReferenceCharacter(char C) {
return C >= '0' && C <= '9';
}
bool isHTMLHexCharacterReferenceCharacter(char C) {
return (C >= '0' && C <= '9') ||
(C >= 'a' && C <= 'f') ||
(C >= 'A' && C <= 'F');
}
#include "clang/AST/CommentHTMLTags.inc"
} // unnamed namespace
StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
return llvm::StringSwitch<StringRef>(Name)
.Case("amp", "&")
.Case("lt", "<")
.Case("gt", ">")
.Case("quot", "\"")
.Case("apos", "\'")
.Default("");
}
StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
unsigned CodePoint = 0;
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
CodePoint *= 10;
CodePoint += Name[i] - '0';
}
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
char *ResolvedPtr = Resolved;
if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
return StringRef(Resolved, ResolvedPtr - Resolved);
else
return StringRef();
}
StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
unsigned CodePoint = 0;
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
CodePoint *= 16;
const char C = Name[i];
assert(isHTMLHexCharacterReferenceCharacter(C));
if (C >= '0' && C <= '9')
CodePoint += Name[i] - '0';
else if (C >= 'a' && C <= 'f')
CodePoint += Name[i] - 'a' + 10;
else
CodePoint += Name[i] - 'A' + 10;
}
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
char *ResolvedPtr = Resolved;
if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
return StringRef(Resolved, ResolvedPtr - Resolved);
else
return StringRef();
}
void Lexer::skipLineStartingDecorations() {
// This function should be called only for C comments
assert(CommentState == LCS_InsideCComment);
if (BufferPtr == CommentEnd)
return;
switch (*BufferPtr) {
case ' ':
case '\t':
case '\f':
case '\v': {
const char *NewBufferPtr = BufferPtr;
NewBufferPtr++;
if (NewBufferPtr == CommentEnd)
return;
char C = *NewBufferPtr;
while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
NewBufferPtr++;
if (NewBufferPtr == CommentEnd)
return;
C = *NewBufferPtr;
}
if (C == '*')
BufferPtr = NewBufferPtr + 1;
break;
}
case '*':
BufferPtr++;
break;
}
}
namespace {
/// Returns pointer to the first newline character in the string.
const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
const char C = *BufferPtr;
if (C == '\n' || C == '\r')
return BufferPtr;
}
return BufferEnd;
}
const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
if (BufferPtr == BufferEnd)