//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines lexer for structured comments and supporting token class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_COMMENT_LEXER_H
#define LLVM_CLANG_AST_COMMENT_LEXER_H
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace comments {
class Lexer;
class TextTokenRetokenizer;
struct CommandInfo;
class CommandTraits;
namespace tok {
enum TokenKind {
eof,
newline,
text,
unknown_command, // Command that does not have an ID.
backslash_command, // Command with an ID, that used backslash marker.
at_command, // Command with an ID, that used 'at' marker.
verbatim_block_begin,
verbatim_block_line,
verbatim_block_end,
verbatim_line_name,
verbatim_line_text,
html_start_tag, //
html_slash_greater, // />
html_end_tag // '.
LS_HTMLEndTag
};
/// Current lexing mode.
LexerState State;
/// If State is LS_VerbatimBlock, contains the name of verbatim end
/// command, including command marker.
SmallString<16> VerbatimBlockEndCommandName;
/// Given a character reference name (e.g., "lt"), return the character that
/// it stands for (e.g., "<").
StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
/// Given a Unicode codepoint as base-10 integer, return the character.
StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
/// Given a Unicode codepoint as base-16 integer, return the character.
StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
void formTokenWithChars(Token &Result, const char *TokEnd,
tok::TokenKind Kind) {
const unsigned TokLen = TokEnd - BufferPtr;
Result.setLocation(getSourceLocation(BufferPtr));
Result.setKind(Kind);
Result.setLength(TokLen);
#ifndef NDEBUG
Result.TextPtr = "";
Result.IntVal = 7;
#endif
BufferPtr = TokEnd;
}
void formTextToken(Token &Result, const char *TokEnd) {
StringRef Text(BufferPtr, TokEnd - BufferPtr);
formTokenWithChars(Result, TokEnd, tok::text);
Result.setText(Text);
}
SourceLocation getSourceLocation(const char *Loc) const {
assert(Loc >= BufferStart && Loc <= BufferEnd &&
"Location out of range for this buffer!");
const unsigned CharNo = Loc - BufferStart;
return FileLoc.getLocWithOffset(CharNo);
}
/// Eat string matching regexp \code \s*\* \endcode.
void skipLineStartingDecorations();
/// Lex stuff inside comments. CommentEnd should be set correctly.
void lexCommentText(Token &T);
void setupAndLexVerbatimBlock(Token &T,
const char *TextBegin,
char Marker, const CommandInfo *Info);
void lexVerbatimBlockFirstLine(Token &T);
void lexVerbatimBlockBody(Token &T);
void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
const CommandInfo *Info);
void lexVerbatimLineText(Token &T);
void lexHTMLCharacterReference(Token &T);
void setupAndLexHTMLStartTag(Token &T);
void lexHTMLStartTag(Token &T);
void setupAndLexHTMLEndTag(Token &T);
void lexHTMLEndTag(Token &T);
public:
Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
SourceLocation FileLoc,
const char *BufferStart, const char *BufferEnd);
void lex(Token &T);
StringRef getSpelling(const Token &Tok,
const SourceManager &SourceMgr,
bool *Invalid = NULL) const;
};
} // end namespace comments
} // end namespace clang
#endif