diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
commit | 8d3ba23f2d9e6c87794d059412a0808c9cbacb25 (patch) | |
tree | c72c618faeffa1c098c4df33857bd12a72c62fb1 /lib/AST/CommentParser.cpp | |
parent | 1838703fea568b394407b83d1055b4c7f52fb105 (diff) |
Implement AST classes for comments, a real parser for Doxygen comments and a
very simple semantic analysis that just builds the AST; minor changes for lexer
to pick up source locations I didn't think about before.
Comments AST is modelled along the ideas of HTML AST: block and inline content.
* Block content is a paragraph or a command that has a paragraph as an argument
or verbatim command.
* Inline content is placed within some block. Inline content includes plain
text, inline commands and HTML as tag soup.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/AST/CommentParser.cpp')
-rw-r--r-- | lib/AST/CommentParser.cpp | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp new file mode 100644 index 0000000000..701b6fa1e1 --- /dev/null +++ b/lib/AST/CommentParser.cpp @@ -0,0 +1,414 @@ +//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentSema.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator): + L(L), S(S), Allocator(Allocator) { + consumeToken(); +} + +ParamCommandComment *Parser::parseParamCommandArgs( + ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + // Check if argument looks like direction specification: [dir] + // e.g., [in], [out], [in,out] + if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) + PC = S.actOnParamCommandArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText(), + /* IsDirection = */ true); + + if (Retokenizer.lexWord(Arg)) + StringRef ArgText = Arg.getText(); + PC = S.actOnParamCommandArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText(), + /* IsDirection = */ false); + + return PC; +} + +BlockCommandComment *Parser::parseBlockCommandArgs( + BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + typedef BlockCommandComment::Argument Argument; + Argument *Args = new (Allocator) Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { + Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), + Arg.getEndLocation()), + Arg.getText()); + ParsedArgs++; + } + + return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); +} + +BlockCommandComment *Parser::parseBlockCommand() { + assert(Tok.is(tok::command)); + + ParamCommandComment *PC; + BlockCommandComment *BC; + bool IsParam = false; + unsigned NumArgs = 0; + if (S.isParamCommand(Tok.getCommandName())) { + IsParam = true; + PC = S.actOnParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } else { + NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); + BC = S.actOnBlockCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } + consumeToken(); + + if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { + // Block command ahead. We can't nest block commands, so pretend that this + // command has an empty argument. + // TODO: Diag() Warn empty arg to block command + ParagraphComment *PC = S.actOnParagraphComment( + ArrayRef<InlineContentComment *>()); + return S.actOnBlockCommandFinish(BC, PC); + } + + if (IsParam || NumArgs > 0) { + // In order to parse command arguments we need to retokenize a few + // following text tokens. + TextTokenRetokenizer Retokenizer(Allocator); + while (Tok.is(tok::text)) { + if (Retokenizer.addToken(Tok)) + consumeToken(); + } + + if (IsParam) + PC = parseParamCommandArgs(PC, Retokenizer); + else + BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); + + // Put back tokens we didn't use. + Token Text; + while (Retokenizer.lexText(Text)) + putBack(Text); + } + + BlockContentComment *Block = parseParagraphOrBlockCommand(); + // Since we have checked for a block command, we should have parsed a + // paragraph. + if (IsParam) + return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); + else + return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); +} + +InlineCommandComment *Parser::parseInlineCommand() { + assert(Tok.is(tok::command)); + + const Token CommandTok = Tok; + consumeToken(); + + TextTokenRetokenizer Retokenizer(Allocator); + while (Tok.is(tok::text)) { + if (Retokenizer.addToken(Tok)) + consumeToken(); + } + + Token ArgTok; + bool ArgTokValid = Retokenizer.lexWord(ArgTok); + + InlineCommandComment *IC; + if (ArgTokValid) { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName(), + ArgTok.getLocation(), + ArgTok.getEndLocation(), + ArgTok.getText()); + } else { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName()); + } + + Token Text; + while (Retokenizer.lexText(Text)) + putBack(Text); + + return IC; +} + +HTMLOpenTagComment *Parser::parseHTMLOpenTag() { + assert(Tok.is(tok::html_tag_open)); + HTMLOpenTagComment *HOT = + S.actOnHTMLOpenTagStart(Tok.getLocation(), + Tok.getHTMLTagOpenName()); + consumeToken(); + + SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs; + while (true) { + if (Tok.is(tok::html_ident)) { + Token Ident = Tok; + consumeToken(); + if (Tok.isNot(tok::html_equals)) { + Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Token Equals = Tok; + consumeToken(); + if (Tok.isNot(tok::html_quoted_string)) { + // TODO: Diag() expected quoted string + Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Attrs.push_back(HTMLOpenTagComment::Attribute( + Ident.getLocation(), + Ident.getHTMLIdent(), + Equals.getLocation(), + SourceRange(Tok.getLocation(), + Tok.getEndLocation()), + Tok.getHTMLQuotedString())); + consumeToken(); + continue; + } else if (Tok.is(tok::html_greater)) { + HOT = S.actOnHTMLOpenTagFinish(HOT, + copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation()); + consumeToken(); + return HOT; + } else if (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) { + // TODO: Diag() Err expected ident + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + } else { + // Not a token from HTML open tag. Thus HTML tag prematurely ended. + // TODO: Diag() Err HTML tag prematurely ended + return S.actOnHTMLOpenTagFinish(HOT, + copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation()); + } + } +} + +HTMLCloseTagComment *Parser::parseHTMLCloseTag() { + assert(Tok.is(tok::html_tag_close)); + Token TokTagOpen = Tok; + consumeToken(); + SourceLocation Loc; + if (Tok.is(tok::html_greater)) { + Loc = Tok.getLocation(); + consumeToken(); + } + + return S.actOnHTMLCloseTag(TokTagOpen.getLocation(), + Loc, + TokTagOpen.getHTMLTagCloseName()); +} + +BlockContentComment *Parser::parseParagraphOrBlockCommand() { + SmallVector<InlineContentComment *, 8> Content; + + while (true) { + switch (Tok.getKind()) { + case tok::verbatim_block_begin: + case tok::verbatim_line_name: + case tok::eof: + assert(Content.size() != 0); + break; // Block content or EOF ahead, finish this parapgaph. + + case tok::command: + if (S.isBlockCommand(Tok.getCommandName())) { + if (Content.size() == 0) + return parseBlockCommand(); + break; // Block command ahead, finish this parapgaph. + } + if (S.isInlineCommand(Tok.getCommandName())) { + Content.push_back(parseInlineCommand()); + continue; + } + + // Not a block command, not an inline command ==> an unknown command. + Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName())); + consumeToken(); + continue; + + case tok::newline: { + consumeToken(); + if (Tok.is(tok::newline) || Tok.is(tok::eof)) { + consumeToken(); + break; // Two newlines -- end of paragraph. + } + if (Content.size() > 0) + Content.back()->addTrailingNewline(); + continue; + } + + // Don't deal with HTML tag soup now. + case tok::html_tag_open: + Content.push_back(parseHTMLOpenTag()); + continue; + + case tok::html_tag_close: + Content.push_back(parseHTMLCloseTag()); + continue; + + case tok::text: + Content.push_back(S.actOnText(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getText())); + consumeToken(); + continue; + + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + llvm_unreachable("should not see this token"); + } + break; + } + + return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); +} + +VerbatimBlockComment *Parser::parseVerbatimBlock() { + assert(Tok.is(tok::verbatim_block_begin)); + + VerbatimBlockComment *VB = + S.actOnVerbatimBlockStart(Tok.getLocation(), + Tok.getVerbatimBlockName()); + consumeToken(); + + // Don't create an empty line if verbatim opening command is followed + // by a newline. + if (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<VerbatimBlockLineComment *, 8> Lines; + while (Tok.is(tok::verbatim_block_line) || + Tok.is(tok::newline)) { + VerbatimBlockLineComment *Line; + if (Tok.is(tok::verbatim_block_line)) { + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + Tok.getVerbatimBlockText()); + consumeToken(); + if (Tok.is(tok::newline)) { + consumeToken(); + } + } else { + // Empty line, just a tok::newline. + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + ""); + consumeToken(); + } + Lines.push_back(Line); + } + + assert(Tok.is(tok::verbatim_block_end)); + VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), + Tok.getVerbatimBlockName(), + copyArray(llvm::makeArrayRef(Lines))); + consumeToken(); + + return VB; +} + +VerbatimLineComment *Parser::parseVerbatimLine() { + assert(Tok.is(tok::verbatim_line_name)); + + Token NameTok = Tok; + consumeToken(); + + SourceLocation TextBegin; + StringRef Text; + // Next token might not be a tok::verbatim_line_text if verbatim line + // starting command comes just before a newline or comment end. + if (Tok.is(tok::verbatim_line_text)) { + TextBegin = Tok.getLocation(); + Text = Tok.getVerbatimLineText(); + } else { + TextBegin = NameTok.getEndLocation(); + Text = ""; + } + + VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), + NameTok.getVerbatimLineName(), + TextBegin, + Text); + consumeToken(); + return VL; +} + +BlockContentComment *Parser::parseBlockContent() { + switch (Tok.getKind()) { + case tok::text: + case tok::command: + case tok::html_tag_open: + case tok::html_tag_close: + return parseParagraphOrBlockCommand(); + + case tok::verbatim_block_begin: + return parseVerbatimBlock(); + + case tok::verbatim_line_name: + return parseVerbatimLine(); + + case tok::eof: + case tok::newline: + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + llvm_unreachable("should not see this token"); + } +} + +FullComment *Parser::parseFullComment() { + // Skip newlines at the beginning of the comment. + while (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<BlockContentComment *, 8> Blocks; + while (Tok.isNot(tok::eof)) { + Blocks.push_back(parseBlockContent()); + + // Skip extra newlines after paragraph end. + while (Tok.is(tok::newline)) + consumeToken(); + } + return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); +} + +} // end namespace comments +} // end namespace clang + + |