diff options
-rw-r--r-- | lib/Format/BreakableToken.cpp | 161 | ||||
-rw-r--r-- | lib/Format/BreakableToken.h | 226 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 4 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 537 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 223 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 120 | ||||
-rw-r--r-- | unittests/Format/FormatTest.cpp | 21 |
7 files changed, 811 insertions, 481 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp new file mode 100644 index 0000000000..4ec3de9608 --- /dev/null +++ b/lib/Format/BreakableToken.cpp @@ -0,0 +1,161 @@ +//===--- BreakableToken.cpp - Format C++ code -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Contains implementation of BreakableToken class and classes derived +/// from it. +/// +//===----------------------------------------------------------------------===// + +#include "BreakableToken.h" +#include <algorithm> + +namespace clang { +namespace format { + +BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, + unsigned StartColumn) + : Tok(Token.FormatTok), StartColumn(StartColumn) { + + SourceLocation TokenLoc = Tok.Tok.getLocation(); + TokenText = StringRef(SourceMgr.getCharacterData(TokenLoc), Tok.TokenLength); + assert(TokenText.startswith("/*") && TokenText.endswith("*/")); + + OriginalStartColumn = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1; + + TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); + + NeedsStar = true; + CommonPrefixLength = UINT_MAX; + if (Lines.size() == 1) { + if (Token.Parent == 0) { + // Standalone block comments will be aligned and prefixed with *s. + CommonPrefixLength = OriginalStartColumn + 1; + } else { + // Trailing comments can start on arbitrary column, and available + // horizontal space can be too small to align consecutive lines with + // the first one. We could, probably, align them to current + // indentation level, but now we just wrap them without indentation + // and stars. + CommonPrefixLength = 0; + NeedsStar = false; + } + } else { + for (size_t i = 1; i < Lines.size(); ++i) { + size_t FirstNonWhitespace = Lines[i].find_first_not_of(" "); + if (FirstNonWhitespace != StringRef::npos) { + NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*'); + CommonPrefixLength = + std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace); + } + } + } + if (CommonPrefixLength == UINT_MAX) + CommonPrefixLength = 0; + + IndentAtLineBreak = + std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0); +} + +void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) { + SourceLocation TokenLoc = Tok.Tok.getLocation(); + int IndentDelta = StartColumn - OriginalStartColumn; + if (IndentDelta > 0) { + std::string WhiteSpace(IndentDelta, ' '); + for (size_t i = 1; i < Lines.size(); ++i) { + Whitespaces.addReplacement( + TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0, + WhiteSpace); + } + } else if (IndentDelta < 0) { + std::string WhiteSpace(-IndentDelta, ' '); + // Check that the line is indented enough. + for (size_t i = 1; i < Lines.size(); ++i) { + if (!Lines[i].startswith(WhiteSpace)) + return; + } + for (size_t i = 1; i < Lines.size(); ++i) { + Whitespaces.addReplacement( + TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), + -IndentDelta, ""); + } + } + + for (unsigned i = 1; i < Lines.size(); ++i) + Lines[i] = Lines[i].substr(CommonPrefixLength + (NeedsStar ? 2 : 0)); +} + +BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex, + unsigned TailOffset, + unsigned ColumnLimit) { + StringRef Text = getLine(LineIndex).substr(TailOffset); + unsigned DecorationLength = + (TailOffset == 0 && LineIndex == 0) ? StartColumn + 2 : getPrefixLength(); + if (ColumnLimit <= DecorationLength + 1) + return Split(StringRef::npos, 0); + + unsigned MaxSplit = ColumnLimit - DecorationLength + 1; + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset == StringRef::npos || + Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { + SpaceOffset = Text.find(' ', MaxSplit); + } + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { + StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); + StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); + return BreakableToken::Split(BeforeCut.size(), + AfterCut.begin() - BeforeCut.end()); + } + return BreakableToken::Split(StringRef::npos, 0); +} + +void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, bool InPPDirective, + WhitespaceManager &Whitespaces) { + StringRef Text = getLine(LineIndex).substr(TailOffset); + StringRef AdditionalPrefix = NeedsStar ? "* " : ""; + if (Text.size() == Split.first + Split.second) { + // For all but the last line handle trailing space separately. + if (LineIndex < Lines.size() - 1) + return; + // For the last line we need to break before "*/", but not to add "* ". + AdditionalPrefix = ""; + } + + unsigned WhitespaceStartColumn = + Split.first + + (LineIndex == 0 && TailOffset == 0 ? StartColumn + 2 : getPrefixLength()); + unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, + InPPDirective, IndentAtLineBreak, + WhitespaceStartColumn); +} + +void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) { + if (LineIndex == Lines.size() - 1) + return; + StringRef Text = Lines[LineIndex].substr(TailOffset); + if (!Text.endswith(" ") && !InPPDirective) + return; + + StringRef TrimmedLine = Text.rtrim(); + unsigned WhitespaceStartColumn = + getLineLengthAfterSplit(LineIndex, TailOffset); + unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); + unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; + Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, + 0, WhitespaceStartColumn); +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h new file mode 100644 index 0000000000..0609104a6f --- /dev/null +++ b/lib/Format/BreakableToken.h @@ -0,0 +1,226 @@ +//===--- BreakableToken.h - Format C++ code -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares BreakableToken, BreakableStringLiteral, and +/// BreakableBlockComment classes, that contain token type-specific logic to +/// break long lines in tokens. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H + +#include "TokenAnnotator.h" +#include "WhitespaceManager.h" +#include <utility> + +namespace clang { +namespace format { + +class BreakableToken { +public: + virtual ~BreakableToken() {} + virtual unsigned getLineCount() const = 0; + virtual unsigned getLineSize(unsigned Index) = 0; + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) = 0; + virtual unsigned getPrefixLength() = 0; + virtual unsigned getSuffixLength(unsigned LineIndex) = 0; + + // Contains starting character index and length of split. + typedef std::pair<StringRef::size_type, unsigned> Split; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) = 0; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, + WhitespaceManager &Whitespaces) = 0; + virtual void trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) = 0; +}; + +class BreakableStringLiteral : public BreakableToken { +public: + BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn) + : Tok(Tok), StartColumn(StartColumn) {} + + virtual unsigned getLineCount() const { return 1; } + + virtual unsigned getLineSize(unsigned Index) { + return Tok.TokenLength - 2; // Should be in sync with getLine + } + + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) { + return getPrefixLength() + getLine(LineIndex).size() - TailOffset + + getSuffixLength(LineIndex); + } + + virtual unsigned getPrefixLength() { return StartColumn + 1; } + + virtual unsigned getSuffixLength(unsigned LineIndex) { return 1; } + + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) { + StringRef Text = getLine(LineIndex).substr(TailOffset); + unsigned DecorationLength = getPrefixLength() + getSuffixLength(0); + if (ColumnLimit <= DecorationLength) + return Split(StringRef::npos, 0); + unsigned MaxSplit = ColumnLimit - DecorationLength; + assert(MaxSplit < Text.size()); + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) + return Split(SpaceOffset + 1, 0); + StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); + if (SlashOffset != StringRef::npos && SlashOffset != 0) + return Split(SlashOffset + 1, 0); + StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); + if (SplitPoint != StringRef::npos && SplitPoint > 1) + // Do not split at 0. + return Split(SplitPoint, 0); + return Split(StringRef::npos, 0); + } + + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces) { + unsigned WhitespaceStartColumn = StartColumn + Split.first + 2; + Whitespaces.breakToken(Tok, TailOffset + Split.first + 1, Split.second, + "\"", "\"", InPPDirective, StartColumn, + WhitespaceStartColumn); + } + + virtual void trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) {} + +private: + StringRef getLine(unsigned Index) { + // Get string without quotes. + // FIXME: Handle string prefixes. + return StringRef(Tok.Tok.getLiteralData() + 1, Tok.TokenLength - 2); + } + + static StringRef::size_type getStartOfCharacter(StringRef Text, + StringRef::size_type Offset) { + StringRef::size_type NextEscape = Text.find('\\'); + while (NextEscape != StringRef::npos && NextEscape < Offset) { + StringRef::size_type SequenceLength = + getEscapeSequenceLength(Text.substr(NextEscape)); + if (Offset < NextEscape + SequenceLength) + return NextEscape; + NextEscape = Text.find('\\', NextEscape + SequenceLength); + } + return Offset; + } + + static unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; + + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': + return getHexLength(Text); + default: + if (Text[1] >= '0' && Text[1] <= '7') + return getOctalLength(Text); + return 2; + } + } + + static unsigned getHexLength(StringRef Text) { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || + (Text[I] >= 'a' && Text[I] <= 'f') || + (Text[I] >= 'A' && Text[I] <= 'F'))) { + ++I; + } + return I; + } + + static unsigned getOctalLength(StringRef Text) { + unsigned I = 1; + while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { + ++I; + } + return I; + } + + const FormatToken &Tok; + unsigned StartColumn; +}; + +class BreakableBlockComment : public BreakableToken { +public: + BreakableBlockComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, unsigned StartColumn); + + void alignLines(WhitespaceManager &Whitespaces); + + virtual unsigned getLineCount() const { return Lines.size(); } + + virtual unsigned getLineSize(unsigned Index) { + return getLine(Index).size(); + } + + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) { + unsigned ContentStartColumn = getPrefixLength(); + if (TailOffset == 0 && LineIndex == 0) + ContentStartColumn = StartColumn + 2; + return ContentStartColumn + getLine(LineIndex).size() - TailOffset + + getSuffixLength(LineIndex); + } + + virtual unsigned getPrefixLength() { + return IndentAtLineBreak + (NeedsStar ? 2 : 0); + } + + virtual unsigned getSuffixLength(unsigned LineIndex) { + if (LineIndex + 1 < Lines.size()) + return 0; + return 2; + } + + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit); + + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces); + + virtual void trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, WhitespaceManager &Whitespaces); + +private: + // Get comment lines without /* */, common prefix and trailing whitespace. + // Last line is not trimmed, as it is terminated by */, so its trailing + // whitespace is not really trailing. + StringRef getLine(unsigned Index) { + return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; + } + + const FormatToken &Tok; + const unsigned StartColumn; + StringRef TokenText; + unsigned OriginalStartColumn; + unsigned CommonPrefixLength; + unsigned IndentAtLineBreak; + bool NeedsStar; + SmallVector<StringRef, 16> Lines; +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index d8630eeeea..560e38b4bf 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -1,9 +1,11 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat + BreakableToken.cpp + Format.cpp TokenAnnotator.cpp UnwrappedLineParser.cpp - Format.cpp + WhitespaceManager.cpp ) add_dependencies(clangFormat diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index d0dfdceddc..f93509e18c 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -15,8 +15,10 @@ #define DEBUG_TYPE "format-formatter" +#include "BreakableToken.h" #include "TokenAnnotator.h" #include "UnwrappedLineParser.h" +#include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -93,367 +95,6 @@ static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { return End->TotalLength - Tok.TotalLength + 1; } -static size_t -calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (InPPDirective ? 2 : 0); -} - -/// \brief Manages the whitespaces around tokens and their replacements. -/// -/// This includes special handling for certain constructs, e.g. the alignment of -/// trailing line comments. -class WhitespaceManager { -public: - WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) - : SourceMgr(SourceMgr), Style(Style) {} - - /// \brief Replaces the whitespace in front of \p Tok. Only call once for - /// each \c AnnotatedToken. - void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn) { - // 2+ newlines mean an empty line separating logic scopes. - if (NewLines >= 2) - alignComments(); - - SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); - bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn + - Tok.FormatTok.TokenLength > Style.ColumnLimit; - - // Align line comments if they are trailing or if they continue other - // trailing comments. - if (Tok.isTrailingComment()) { - // Remove the comment's trailing whitespace. - if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength) - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength), - Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, "")); - - // Align comment with other comments. - if ((Tok.Parent != NULL || !Comments.empty()) && - !LineExceedsColumnLimit) { - StoredComment Comment; - Comment.Tok = Tok.FormatTok; - Comment.Spaces = Spaces; - Comment.NewLines = NewLines; - Comment.MinColumn = - NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; - Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; - Comment.Untouchable = false; - Comments.push_back(Comment); - return; - } - } - - // If this line does not have a trailing comment, align the stored comments. - if (Tok.Children.empty() && !Tok.isTrailingComment()) - alignComments(); - - if (Tok.Type == TT_BlockComment) { - indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false); - } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) { - StringRef Line(SourceMgr.getCharacterData(TokenLoc), - Tok.FormatTok.TokenLength); - int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0); - StringRef Prefix = getLineCommentPrefix(Line); - std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str(); - splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()), - StartColumn + Prefix.size(), NewPrefix, - /*InPPDirective=*/ false, - /*CommentHasMoreLines=*/ false); - } - - storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); - } - - /// \brief Like \c replaceWhitespace, but additionally adds right-aligned - /// backslashes to escape newlines inside a preprocessor directive. - /// - /// This function and \c replaceWhitespace have the same behavior if - /// \c Newlines == 0. - void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn) { - if (Tok.Type == TT_BlockComment) - indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true); - - storeReplacement(Tok.FormatTok, - getNewLineText(NewLines, Spaces, WhitespaceStartColumn)); - } - - /// \brief Inserts a line break into the middle of a token. - /// - /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line - /// break and \p Postfix before the rest of the token starts in the next line. - /// - /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are - /// used to generate the correct line break. - void breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, - bool InPPDirective, unsigned Spaces, - unsigned WhitespaceStartColumn) { - std::string NewLineText; - if (!InPPDirective) - NewLineText = getNewLineText(1, Spaces); - else - NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn); - std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); - SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset); - Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, - ReplacementText)); - } - - /// \brief Returns all the \c Replacements created during formatting. - const tooling::Replacements &generateReplacements() { - alignComments(); - return Replaces; - } - - void addUntouchableComment(unsigned Column) { - StoredComment Comment; - Comment.MinColumn = Column; - Comment.MaxColumn = Column; - Comment.Untouchable = true; - Comments.push_back(Comment); - } - -private: - static StringRef getLineCommentPrefix(StringRef Comment) { - const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) - if (Comment.startswith(KnownPrefixes[i])) - return KnownPrefixes[i]; - return ""; - } - - /// \brief Finds a common prefix of lines of a block comment to properly - /// indent (and possibly decorate with '*'s) added lines. - /// - /// The first line is ignored (it's special and starts with /*). The number of - /// lines should be more than one. - static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines, - const char *PrefixChars = " *") { - assert(Lines.size() > 1); - StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars)); - for (size_t i = 2; i < Lines.size(); ++i) { - for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) { - if (Prefix[j] != Lines[i][j]) { - Prefix = Prefix.substr(0, j); - break; - } - } - } - return Prefix; - } - - /// \brief Splits one line in a line or block comment, if it doesn't fit to - /// provided column limit. Removes trailing whitespace in each line. - /// - /// \param Line points to the line contents without leading // or /*. - /// - /// \param StartColumn is the column where the first character of Line will be - /// located after formatting. - /// - /// \param LinePrefix is inserted after each line break. - /// - /// When \param InPPDirective is true, each line break will be preceded by a - /// backslash in the last column to make line breaks inside the comment - /// visually consistent with line breaks outside the comment. This only makes - /// sense for block comments. - /// - /// When \param CommentHasMoreLines is false, no line breaks/trailing - /// backslashes will be inserted after it. - void splitLineInComment(const FormatToken &Tok, StringRef Line, - size_t StartColumn, StringRef LinePrefix, - bool InPPDirective, bool CommentHasMoreLines, - const char *WhiteSpaceChars = " ") { - size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective); - const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation()); - - StringRef TrimmedLine = Line.rtrim(); - int TrailingSpaceLength = Line.size() - TrimmedLine.size(); - - // Don't touch leading whitespace. - Line = TrimmedLine.ltrim(); - StartColumn += TrimmedLine.size() - Line.size(); - - while (Line.size() + StartColumn > ColumnLimit) { - // Try to break at the last whitespace before the column limit. - size_t SpacePos = - Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1); - if (SpacePos == StringRef::npos) { - // Try to find any whitespace in the line. - SpacePos = Line.find_first_of(WhiteSpaceChars); - if (SpacePos == StringRef::npos) // No whitespace found, give up. - break; - } - - StringRef NextCut = Line.substr(0, SpacePos).rtrim(); - StringRef RemainingLine = Line.substr(SpacePos).ltrim(); - if (RemainingLine.empty()) - break; - - if (RemainingLine == "*/" && LinePrefix.endswith("* ")) - LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2); - - Line = RemainingLine; - - size_t ReplaceChars = Line.begin() - NextCut.end(); - breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix, - InPPDirective, 0, NextCut.size() + StartColumn); - StartColumn = LinePrefix.size(); - } - - if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) { - // Remove trailing whitespace/insert backslash. + 1 is for \n - breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "", - InPPDirective, 0, Line.size() + StartColumn); - } - } - - /// \brief Changes indentation of all lines in a block comment by Indent, - /// removes trailing whitespace from each line, splits lines that end up - /// exceeding the column limit. - void indentBlockComment(const AnnotatedToken &Tok, int Indent, - int WhitespaceStartColumn, int NewLines, - bool InPPDirective) { - assert(Tok.Type == TT_BlockComment); - int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0); - const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); - const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1; - const int IndentDelta = Indent - CurrentIndent; - const StringRef Text(SourceMgr.getCharacterData(TokenLoc), - Tok.FormatTok.TokenLength); - assert(Text.startswith("/*") && Text.endswith("*/")); - - SmallVector<StringRef, 16> Lines; - Text.split(Lines, "\n"); - - if (IndentDelta > 0) { - std::string WhiteSpace(IndentDelta, ' '); - for (size_t i = 1; i < Lines.size(); ++i) { - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), - 0, WhiteSpace)); - } - } else if (IndentDelta < 0) { - std::string WhiteSpace(-IndentDelta, ' '); - // Check that the line is indented enough. - for (size_t i = 1; i < Lines.size(); ++i) { - if (!Lines[i].startswith(WhiteSpace)) - return; - } - for (size_t i = 1; i < Lines.size(); ++i) { - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), - -IndentDelta, "")); - } - } - - // Split long lines in comments. - size_t OldPrefixSize = 0; - std::string NewPrefix; - if (Lines.size() > 1) { - StringRef CurrentPrefix = findCommentLinesPrefix(Lines); - OldPrefixSize = CurrentPrefix.size(); - NewPrefix = (IndentDelta < 0) - ? CurrentPrefix.substr(-IndentDelta).str() - : std::string(IndentDelta, ' ') + CurrentPrefix.str(); - if (CurrentPrefix.endswith("*")) { - NewPrefix += " "; - ++OldPrefixSize; - } - } else if (Tok.Parent == 0) { - NewPrefix = std::string(StartColumn, ' ') + " * "; - } - - StartColumn += 2; - for (size_t i = 0; i < Lines.size(); ++i) { - StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize); - splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix, - InPPDirective, i != Lines.size() - 1); - StartColumn = NewPrefix.size(); - } - } - - std::string getNewLineText(unsigned NewLines, unsigned Spaces) { - return std::string(NewLines, '\n') + std::string(Spaces, ' '); - } - - std::string getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - std::string NewLineText; - if (NewLines > 0) { - unsigned Offset = - std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); - for (unsigned i = 0; i < NewLines; ++i) { - NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); - NewLineText += "\\\n"; - Offset = 0; - } - } - return NewLineText + std::string(Spaces, ' '); - } - - /// \brief Structure to store a comment for later layout and alignment. - struct StoredComment { - FormatToken Tok; - unsigned MinColumn; - unsigned MaxColumn; - unsigned NewLines; - unsigned Spaces; - bool Untouchable; - }; - SmallVector<StoredComment, 16> Comments; - typedef SmallVector<StoredComment, 16>::iterator comment_iterator; - - /// \brief Try to align all stashed comments. - void alignComments() { - unsigned MinColumn = 0; - unsigned MaxColumn = UINT_MAX; - comment_iterator Start = Comments.begin(); - for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) { - if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { - alignComments(Start, I, MinColumn); - MinColumn = I->MinColumn; - MaxColumn = I->MaxColumn; - Start = I; - } else { - MinColumn = std::max(MinColumn, I->MinColumn); - MaxColumn = std::min(MaxColumn, I->MaxColumn); - } - } - alignComments(Start, Comments.end(), MinColumn); - Comments.clear(); - } - - /// \brief Put all the comments between \p I and \p E into \p Column. - void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { - while (I != E) { - if (!I->Untouchable) { - unsigned Spaces = I->Spaces + Column - I->MinColumn; - storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces)); - } - ++I; - } - } - - /// \brief Stores \p Text as the replacement for the whitespace in front of - /// \p Tok. - void storeReplacement(const FormatToken &Tok, const std::string Text) { - // Don't create a replacement, if it does not change anything. - if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart), - Tok.WhiteSpaceLength) == Text) - return; - - Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, - Tok.WhiteSpaceLength, Text)); - } - - SourceManager &SourceMgr; - tooling::Replacements Replaces; - const FormatStyle &Style; -}; - class UnwrappedLineFormatter { public: UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, @@ -602,7 +243,7 @@ private: if (StartOfFunctionCall != Other.StartOfFunctionCall) return StartOfFunctionCall < Other.StartOfFunctionCall; if (NestedNameSpecifierContinuation != - Other.NestedNameSpecifierContinuation) + Other.NestedNameSpecifierContinuation) return NestedNameSpecifierContinuation < Other.NestedNameSpecifierContinuation; if (CallContinuation != Other.CallContinuation) @@ -647,7 +288,7 @@ private: if (Column != Other.Column) return Column < Other.Column; if (LineContainsContinuedForLoopSection != - Other.LineContainsContinuedForLoopSection) + Other.LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; if (ParenLevel != Other.ParenLevel) return ParenLevel < Other.ParenLevel; @@ -806,7 +447,7 @@ private: if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { if (State.Stack.back().Indent + Current.LongestObjCSelectorName > - State.Column + Spaces + Current.FormatTok.TokenLength) + State.Column + Spaces + Current.FormatTok.TokenLength) State.Stack.back().ColonPos = State.Stack.back().Indent + Current.LongestObjCSelectorName; else @@ -970,115 +611,78 @@ private: /// it if possible. unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, bool DryRun) { - if (Current.isNot(tok::string_literal)) + llvm::OwningPtr<BreakableToken> Token; + unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; + if (Current.is(tok::string_literal)) { + // Only break up default narrow strings. + const char *LiteralData = Current.FormatTok.Tok.getLiteralData(); + if (!LiteralData || *LiteralData != '"') + return 0; + + Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn)); + } else if (Current.Type == TT_BlockComment) { + BreakableBlockComment *BBC = + new BreakableBlockComment(SourceMgr, Current, StartColumn); + if (!DryRun) + BBC->alignLines(Whitespaces); + Token.reset(BBC); + } else { return 0; - // Only break up default narrow strings. - const char *LiteralData = Current.FormatTok.Tok.getLiteralData(); - if (!LiteralData || *LiteralData != '"') + } + + if (Token->getPrefixLength() + Token->getSuffixLength(0) > + getColumnLimit()) { return 0; + } + bool BreakInserted = false; unsigned Penalty = 0; - unsigned TailOffset = 0; - unsigned TailLength = Current.FormatTok.TokenLength; - unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; - unsigned OffsetFromStart = 0; - while (StartColumn + TailLength > getColumnLimit()) { - StringRef Text = StringRef(LiteralData + TailOffset, TailLength); - if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) - break; - StringRef::size_type SplitPoint = getSplitPoint( - Text, getColumnLimit() - StartColumn - OffsetFromStart - 1); - if (SplitPoint == StringRef::npos) - break; - assert(SplitPoint != 0); - // +2, because 'Text' starts after the opening quotes, and does not - // include the closing quote we need to insert. - unsigned WhitespaceStartColumn = - StartColumn + OffsetFromStart + SplitPoint + 2; - State.Stack.back().LastSpace = StartColumn; + for (unsigned LineIndex = 0; LineIndex < Token->getLineCount(); + ++LineIndex) { + unsigned TokenLineSize = Token->getLineSize(LineIndex); + unsigned TailOffset = 0; + unsigned RemainingLength = + Token->getLineLengthAfterSplit(LineIndex, TailOffset); + while (RemainingLength > getColumnLimit()) { + unsigned DecorationLength = + RemainingLength - (TokenLineSize - TailOffset); + if (DecorationLength + 1 > getColumnLimit()) { + // Can't reduce line length by splitting here. + break; + } + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, getColumnLimit()); + if (Split.first == StringRef::npos) + break; + assert(Split.first != 0); + if (!DryRun) { + Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective, + Whitespaces); + } + TailOffset += Split.first + Split.second; + unsigned OldRemainingLength = RemainingLength; + RemainingLength = Token->getLineLengthAfterSplit(LineIndex, TailOffset); + assert(RemainingLength < OldRemainingLength); + Penalty += Style.PenaltyExcessCharacter; + BreakInserted = true; + } + State.Column = RemainingLength; if (!DryRun) { - Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1, - 0, "\"", "\"", Line.InPPDirective, StartColumn, - WhitespaceStartColumn); + Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces); } - TailOffset += SplitPoint + 1; - TailLength -= SplitPoint + 1; - OffsetFromStart = 1; - Penalty += Style.PenaltyExcessCharacter; + } + + if (BreakInserted) { for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; + State.Stack.back().LastSpace = StartColumn; } - State.Column = StartColumn + TailLength; return Penalty; } - StringRef::size_type - getSplitPoint(StringRef Text, StringRef::size_type Offset) { - StringRef::size_type SpaceOffset = Text.rfind(' ', Offset); - if (SpaceOffset != StringRef::npos & |