diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-24 17:52:18 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-24 17:52:18 +0000 |
commit | db13f04dd0579874932bdb958647d0903f632dd4 (patch) | |
tree | 6fd7057263845ecfe8a8c5e7467000ddd00281fa /lib/AST/CommentParser.cpp | |
parent | c4b0f9b851ca59e61b802d58792ea3600fd9a9d4 (diff) |
Comment parsing: couple TextTokenRetokenizer and comment parser together to
remove one of the two variable-length lookahead buffers. Now retokenizer will
ask for more tokens when it needs them.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160680 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/AST/CommentParser.cpp')
-rw-r--r-- | lib/AST/CommentParser.cpp | 92 |
1 files changed, 42 insertions, 50 deletions
diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp index e781152d3b..3393349eaf 100644 --- a/lib/AST/CommentParser.cpp +++ b/lib/AST/CommentParser.cpp @@ -19,8 +19,8 @@ namespace comments { /// Re-lexes a sequence of tok::text tokens. class TextTokenRetokenizer { llvm::BumpPtrAllocator &Allocator; - static const unsigned MaxTokens = 16; - SmallVector<Token, MaxTokens> Toks; + Parser &P; + SmallVector<Token, 16> Toks; struct Position { unsigned CurToken; @@ -39,7 +39,7 @@ class TextTokenRetokenizer { /// Sets up the buffer pointers to point to current token. void setupBuffer() { - assert(Pos.CurToken < Toks.size()); + assert(!isEnd()); const Token &Tok = Toks[Pos.CurToken]; Pos.BufferStart = Tok.getText().begin(); @@ -65,11 +65,27 @@ class TextTokenRetokenizer { Pos.BufferPtr++; if (Pos.BufferPtr == Pos.BufferEnd) { Pos.CurToken++; - if (Pos.CurToken < Toks.size()) + if (isEnd() && addToken()) { + assert(!isEnd()); setupBuffer(); + } } } + /// Add a token. + /// Returns true on success, false if there are no interesting tokens to + /// fetch from lexer. + bool addToken() { + if (P.Tok.isNot(tok::text)) + return false; + + Toks.push_back(P.Tok); + P.consumeToken(); + if (Toks.size() == 1) + setupBuffer(); + return true; + } + static bool isWhitespace(char C) { return C == ' ' || C == '\n' || C == '\r' || C == '\t' || C == '\f' || C == '\v'; @@ -100,22 +116,10 @@ class TextTokenRetokenizer { } public: - TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator): - Allocator(Allocator) { + TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): + Allocator(Allocator), P(P) { Pos.CurToken = 0; - } - - /// Add a token. - /// Returns true on success, false if it seems like we have enough tokens. - bool addToken(const Token &Tok) { - assert(Tok.is(tok::text)); - if (Toks.size() >= MaxTokens) - return false; - - Toks.push_back(Tok); - if (Toks.size() == 1) - setupBuffer(); - return true; + addToken(); } /// Extract a word -- sequence of non-whitespace characters. @@ -199,23 +203,27 @@ public: return true; } - /// Return a text token. Useful to take tokens back. - bool lexText(Token &Tok) { + /// Put back tokens that we didn't consume. + void putBackLeftoverTokens() { if (isEnd()) - return false; + return; - if (Pos.BufferPtr != Pos.BufferStart) - formTokenWithChars(Tok, getSourceLocation(), + bool HavePartialTok = false; + Token PartialTok; + if (Pos.BufferPtr != Pos.BufferStart) { + formTokenWithChars(PartialTok, getSourceLocation(), Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, StringRef(Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr)); - else - Tok = Toks[Pos.CurToken]; + HavePartialTok = true; + Pos.CurToken++; + } - Pos.CurToken++; - if (Pos.CurToken < Toks.size()) - setupBuffer(); - return true; + P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); + Pos.CurToken = Toks.size(); + + if (HavePartialTok) + P.putBack(PartialTok); } }; @@ -296,24 +304,14 @@ BlockCommandComment *Parser::parseBlockCommand() { if (IsParam || NumArgs > 0) { // In order to parse command arguments we need to retokenize a few // following text tokens. - TextTokenRetokenizer Retokenizer(Allocator); - while (Tok.is(tok::text)) { - if (Retokenizer.addToken(Tok)) - consumeToken(); - } + TextTokenRetokenizer Retokenizer(Allocator, *this); if (IsParam) PC = parseParamCommandArgs(PC, Retokenizer); else BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); - // Put back tokens we didn't use. - SmallVector<Token, 16> TextToks; - Token Text; - while (Retokenizer.lexText(Text)) { - TextToks.push_back(Text); - } - putBack(TextToks); + Retokenizer.putBackLeftoverTokens(); } BlockContentComment *Block = parseParagraphOrBlockCommand(); @@ -331,11 +329,7 @@ InlineCommandComment *Parser::parseInlineCommand() { const Token CommandTok = Tok; consumeToken(); - TextTokenRetokenizer Retokenizer(Allocator); - while (Tok.is(tok::text)) { - if (Retokenizer.addToken(Tok)) - consumeToken(); - } + TextTokenRetokenizer Retokenizer(Allocator, *this); Token ArgTok; bool ArgTokValid = Retokenizer.lexWord(ArgTok); @@ -354,9 +348,7 @@ InlineCommandComment *Parser::parseInlineCommand() { CommandTok.getCommandName()); } - Token Text; - while (Retokenizer.lexText(Text)) - putBack(Text); + Retokenizer.putBackLeftoverTokens(); return IC; } |