Unified token breaking logic for strings and block comments.

Summary: Both strings and block comments are broken into lines in breakProtrudingToken. Logic specific for strings or block comments is abstracted in implementations of the BreakToken interface. Among other goodness, this change fixes placement of backslashes after a block comment inside a preprocessor directive (see removed FIXMEs in unit tests). The code is far from being polished, and some parts of it will be changed for line comments support. Reviewers: klimek Reviewed By: klimek CC: cfe-commits Differential Revision: http://llvm-reviews.chandlerc.com/D665 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@179526 91177308-0d34-0410-b5e6-96231b3b80d8
author: Alexander Kornienko <alexfh@google.com> 2013-04-15 14:28:00 +0000
committer: Alexander Kornienko <alexfh@google.com> 2013-04-15 14:28:00 +0000
commit: 70ce7881fc30a39b795b2873f008e7eca72ba669 (patch)
tree: f9190f278da3d6f81517e5f69243d13f3c46f15e /lib/Format
parent: 115ac5ac1281e6f301da4da6a5c669beae59ffcc (diff)
6 files changed, 804 insertions, 467 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
new file mode 100644
index 0000000000..4ec3de9608
--- /dev/null
+++ b/lib/Format/BreakableToken.cpp
@@ -0,0 +1,161 @@
+//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains implementation of BreakableToken class and classes derived
+/// from it.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BreakableToken.h"
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
+                                             const AnnotatedToken &Token,
+                                             unsigned StartColumn)
+    : Tok(Token.FormatTok), StartColumn(StartColumn) {
+
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  TokenText = StringRef(SourceMgr.getCharacterData(TokenLoc), Tok.TokenLength);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+
+  OriginalStartColumn = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+
+  TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+
+  NeedsStar = true;
+  CommonPrefixLength = UINT_MAX;
+  if (Lines.size() == 1) {
+    if (Token.Parent == 0) {
+      // Standalone block comments will be aligned and prefixed with *s.
+      CommonPrefixLength = OriginalStartColumn + 1;
+    } else {
+      // Trailing comments can start on arbitrary column, and available
+      // horizontal space can be too small to align consecutive lines with
+      // the first one. We could, probably, align them to current
+      // indentation level, but now we just wrap them without indentation
+      // and stars.
+      CommonPrefixLength = 0;
+      NeedsStar = false;
+    }
+  } else {
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
+      if (FirstNonWhitespace != StringRef::npos) {
+        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
+        CommonPrefixLength =
+            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
+      }
+    }
+  }
+  if (CommonPrefixLength == UINT_MAX)
+    CommonPrefixLength = 0;
+
+  IndentAtLineBreak =
+      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+}
+
+void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  int IndentDelta = StartColumn - OriginalStartColumn;
+  if (IndentDelta > 0) {
+    std::string WhiteSpace(IndentDelta, ' ');
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
+          WhiteSpace);
+    }
+  } else if (IndentDelta < 0) {
+    std::string WhiteSpace(-IndentDelta, ' ');
+    // Check that the line is indented enough.
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      if (!Lines[i].startswith(WhiteSpace))
+        return;
+    }
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
+          -IndentDelta, "");
+    }
+  }
+
+  for (unsigned i = 1; i < Lines.size(); ++i)
+    Lines[i] = Lines[i].substr(CommonPrefixLength + (NeedsStar ? 2 : 0));
+}
+
+BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex,
+                                                      unsigned TailOffset,
+                                                      unsigned ColumnLimit) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  unsigned DecorationLength =
+      (TailOffset == 0 && LineIndex == 0) ? StartColumn + 2 : getPrefixLength();
+  if (ColumnLimit <= DecorationLength + 1)
+    return Split(StringRef::npos, 0);
+
+  unsigned MaxSplit = ColumnLimit - DecorationLength + 1;
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset == StringRef::npos ||
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+    SpaceOffset = Text.find(' ', MaxSplit);
+  }
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
+    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
+    StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
+    return BreakableToken::Split(BeforeCut.size(),
+                                 AfterCut.begin() - BeforeCut.end());
+  }
+  return BreakableToken::Split(StringRef::npos, 0);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  StringRef AdditionalPrefix = NeedsStar ? "* " : "";
+  if (Text.size() == Split.first + Split.second) {
+    // For all but the last line handle trailing space separately.
+    if (LineIndex < Lines.size() - 1)
+      return;
+    // For the last line we need to break before "*/", but not to add "* ".
+    AdditionalPrefix = "";
+  }
+
+  unsigned WhitespaceStartColumn =
+      Split.first +
+      (LineIndex == 0 && TailOffset == 0 ? StartColumn + 2 : getPrefixLength());
+  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
+                         InPPDirective, IndentAtLineBreak,
+                         WhitespaceStartColumn);
+}
+
+void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
+                                     unsigned InPPDirective,
+                                     WhitespaceManager &Whitespaces) {
+  if (LineIndex == Lines.size() - 1)
+    return;
+  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  if (!Text.endswith(" ") && !InPPDirective)
+    return;
+
+  StringRef TrimmedLine = Text.rtrim();
+  unsigned WhitespaceStartColumn =
+      getLineLengthAfterSplit(LineIndex, TailOffset);
+  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
+  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
+                         0, WhitespaceStartColumn);
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
new file mode 100644
index 0000000000..0609104a6f
--- /dev/null
+++ b/lib/Format/BreakableToken.h
@@ -0,0 +1,226 @@
+//===--- BreakableToken.h - Format C++ code -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Declares BreakableToken, BreakableStringLiteral, and
+/// BreakableBlockComment classes, that contain token type-specific logic to
+/// break long lines in tokens.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+
+#include "TokenAnnotator.h"
+#include "WhitespaceManager.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+
+class BreakableToken {
+public:
+  virtual ~BreakableToken() {}
+  virtual unsigned getLineCount() const = 0;
+  virtual unsigned getLineSize(unsigned Index) = 0;
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) = 0;
+  virtual unsigned getPrefixLength() = 0;
+  virtual unsigned getSuffixLength(unsigned LineIndex) = 0;
+
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) = 0;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective,
+                           WhitespaceManager &Whitespaces) = 0;
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) = 0;
+};
+
+class BreakableStringLiteral : public BreakableToken {
+public:
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn)
+      : Tok(Tok), StartColumn(StartColumn) {}
+
+  virtual unsigned getLineCount() const { return 1; }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return Tok.TokenLength - 2; // Should be in sync with getLine
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    return getPrefixLength() + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() { return StartColumn + 1; }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) { return 1; }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) {
+    StringRef Text = getLine(LineIndex).substr(TailOffset);
+    unsigned DecorationLength = getPrefixLength() + getSuffixLength(0);
+    if (ColumnLimit <= DecorationLength)
+      return Split(StringRef::npos, 0);
+    unsigned MaxSplit = ColumnLimit - DecorationLength;
+    assert(MaxSplit < Text.size());
+    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+      return Split(SpaceOffset + 1, 0);
+    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+    if (SlashOffset != StringRef::npos && SlashOffset != 0)
+      return Split(SlashOffset + 1, 0);
+    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+    if (SplitPoint != StringRef::npos && SplitPoint > 1)
+      // Do not split at 0.
+      return Split(SplitPoint, 0);
+    return Split(StringRef::npos, 0);
+  }
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces) {
+    unsigned WhitespaceStartColumn = StartColumn + Split.first + 2;
+    Whitespaces.breakToken(Tok, TailOffset + Split.first + 1, Split.second,
+                           "\"", "\"", InPPDirective, StartColumn,
+                           WhitespaceStartColumn);
+  }
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) {}
+
+private:
+  StringRef getLine(unsigned Index) {
+    // Get string without quotes.
+    // FIXME: Handle string prefixes.
+    return StringRef(Tok.Tok.getLiteralData() + 1, Tok.TokenLength - 2);
+  }
+
+  static StringRef::size_type getStartOfCharacter(StringRef Text,
+                                                  StringRef::size_type Offset) {
+    StringRef::size_type NextEscape = Text.find('\\');
+    while (NextEscape != StringRef::npos && NextEscape < Offset) {
+      StringRef::size_type SequenceLength =
+          getEscapeSequenceLength(Text.substr(NextEscape));
+      if (Offset < NextEscape + SequenceLength)
+        return NextEscape;
+      NextEscape = Text.find('\\', NextEscape + SequenceLength);
+    }
+    return Offset;
+  }
+
+  static unsigned getEscapeSequenceLength(StringRef Text) {
+    assert(Text[0] == '\\');
+    if (Text.size() < 2)
+      return 1;
+
+    switch (Text[1]) {
+    case 'u':
+      return 6;
+    case 'U':
+      return 10;
+    case 'x':
+      return getHexLength(Text);
+    default:
+      if (Text[1] >= '0' && Text[1] <= '7')
+        return getOctalLength(Text);
+      return 2;
+    }
+  }
+
+  static unsigned getHexLength(StringRef Text) {
+    unsigned I = 2; // Point after '\x'.
+    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                               (Text[I] >= 'a' && Text[I] <= 'f') ||
+                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
+      ++I;
+    }
+    return I;
+  }
+
+  static unsigned getOctalLength(StringRef Text) {
+    unsigned I = 1;
+    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+      ++I;
+    }
+    return I;
+  }
+
+  const FormatToken &Tok;
+  unsigned StartColumn;
+};
+
+class BreakableBlockComment : public BreakableToken {
+public:
+  BreakableBlockComment(const SourceManager &SourceMgr,
+                        const AnnotatedToken &Token, unsigned StartColumn);
+
+  void alignLines(WhitespaceManager &Whitespaces);
+
+  virtual unsigned getLineCount() const { return Lines.size(); }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return getLine(Index).size();
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    unsigned ContentStartColumn = getPrefixLength();
+    if (TailOffset == 0 && LineIndex == 0)
+      ContentStartColumn = StartColumn + 2;
+    return ContentStartColumn + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() {
+    return IndentAtLineBreak + (NeedsStar ? 2 : 0);
+  }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) {
+    if (LineIndex + 1 < Lines.size())
+      return 0;
+    return 2;
+  }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit);
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+
+private:
+  // Get comment lines without /* */, common prefix and trailing whitespace.
+  // Last line is not trimmed, as it is terminated by */, so its trailing
+  // whitespace is not really trailing.
+  StringRef getLine(unsigned Index) {
+    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
+  }
+
+  const FormatToken &Tok;
+  const unsigned StartColumn;
+  StringRef TokenText;
+  unsigned OriginalStartColumn;
+  unsigned CommonPrefixLength;
+  unsigned IndentAtLineBreak;
+  bool NeedsStar;
+  SmallVector<StringRef, 16> Lines;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt
index d8630eeeea..560e38b4bf 100644
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -1,9 +1,11 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangFormat
+  BreakableToken.cpp
+  Format.cpp
   TokenAnnotator.cpp
   UnwrappedLineParser.cpp
-  Format.cpp
+  WhitespaceManager.cpp
   )
 
 add_dependencies(clangFormat
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index d0dfdceddc..f93509e18c 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -15,8 +15,10 @@
 
 #define DEBUG_TYPE "format-formatter"
 
+#include "BreakableToken.h"
 #include "TokenAnnotator.h"
 #include "UnwrappedLineParser.h"
+#include "WhitespaceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/SourceManager.h"
@@ -93,367 +95,6 @@ static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
   return End->TotalLength - Tok.TotalLength + 1;
 }
 
-static size_t
-calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) {
-  // In preprocessor directives reserve two chars for trailing " \"
-  return Style.ColumnLimit - (InPPDirective ? 2 : 0);
-}
-
-/// \brief Manages the whitespaces around tokens and their replacements.
-///
-/// This includes special handling for certain constructs, e.g. the alignment of
-/// trailing line comments.
-class WhitespaceManager {
-public:
-  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
-      : SourceMgr(SourceMgr), Style(Style) {}
-
-  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
-  /// each \c AnnotatedToken.
-  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                         unsigned Spaces, unsigned WhitespaceStartColumn) {
-    // 2+ newlines mean an empty line separating logic scopes.
-    if (NewLines >= 2)
-      alignComments();
-
-    SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn +
-                                  Tok.FormatTok.TokenLength > Style.ColumnLimit;
-
-    // Align line comments if they are trailing or if they continue other
-    // trailing comments.
-    if (Tok.isTrailingComment()) {
-      // Remove the comment's trailing whitespace.
-      if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
-            Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
-
-      // Align comment with other comments.
-      if ((Tok.Parent != NULL || !Comments.empty()) &&
-          !LineExceedsColumnLimit) {
-        StoredComment Comment;
-        Comment.Tok = Tok.FormatTok;
-        Comment.Spaces = Spaces;
-        Comment.NewLines = NewLines;
-        Comment.MinColumn =
-            NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
-        Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
-        Comment.Untouchable = false;
-        Comments.push_back(Comment);
-        return;
-      }
-    }
-
-    // If this line does not have a trailing comment, align the stored comments.
-    if (Tok.Children.empty() && !Tok.isTrailingComment())
-      alignComments();
-
-    if (Tok.Type == TT_BlockComment) {
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false);
-    } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
-      StringRef Line(SourceMgr.getCharacterData(TokenLoc),
-                     Tok.FormatTok.TokenLength);
-      int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
-      StringRef Prefix = getLineCommentPrefix(Line);
-      std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
-      splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()),
-                         StartColumn + Prefix.size(), NewPrefix,
-                         /*InPPDirective=*/ false,
-                         /*CommentHasMoreLines=*/ false);
-    }
-
-    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
-  }
-
-  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
-  /// backslashes to escape newlines inside a preprocessor directive.
-  ///
-  /// This function and \c replaceWhitespace have the same behavior if
-  /// \c Newlines == 0.
-  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                           unsigned Spaces, unsigned WhitespaceStartColumn) {
-    if (Tok.Type == TT_BlockComment)
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true);
-
-    storeReplacement(Tok.FormatTok,
-                     getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
-  }
-
-  /// \brief Inserts a line break into the middle of a token.
-  ///
-  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
-  /// break and \p Postfix before the rest of the token starts in the next line.
-  ///
-  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
-  /// used to generate the correct line break.
-  void breakToken(const FormatToken &Tok, unsigned Offset,
-                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
-                  bool InPPDirective, unsigned Spaces,
-                  unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (!InPPDirective)
-      NewLineText = getNewLineText(1, Spaces);
-    else
-      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
-    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
-    SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
-    Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
-                                         ReplacementText));
-  }
-
-  /// \brief Returns all the \c Replacements created during formatting.
-  const tooling::Replacements &generateReplacements() {
-    alignComments();
-    return Replaces;
-  }
-
-  void addUntouchableComment(unsigned Column) {
-    StoredComment Comment;
-    Comment.MinColumn = Column;
-    Comment.MaxColumn = Column;
-    Comment.Untouchable = true;
-    Comments.push_back(Comment);
-  }
-
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment) {
-    const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-    for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-      if (Comment.startswith(KnownPrefixes[i]))
-        return KnownPrefixes[i];
-    return "";
-  }
-
-  /// \brief Finds a common prefix of lines of a block comment to properly
-  /// indent (and possibly decorate with '*'s) added lines.
-  ///
-  /// The first line is ignored (it's special and starts with /*). The number of
-  /// lines should be more than one.
-  static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
-                                          const char *PrefixChars = " *") {
-    assert(Lines.size() > 1);
-    StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
-    for (size_t i = 2; i < Lines.size(); ++i) {
-      for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
-        if (Prefix[j] != Lines[i][j]) {
-          Prefix = Prefix.substr(0, j);
-          break;
-        }
-      }
-    }
-    return Prefix;
-  }
-
-  /// \brief Splits one line in a line or block comment, if it doesn't fit to
-  /// provided column limit. Removes trailing whitespace in each line.
-  ///
-  /// \param Line points to the line contents without leading // or /*.
-  ///
-  /// \param StartColumn is the column where the first character of Line will be
-  /// located after formatting.
-  ///
-  /// \param LinePrefix is inserted after each line break.
-  ///
-  /// When \param InPPDirective is true, each line break will be preceded by a
-  /// backslash in the last column to make line breaks inside the comment
-  /// visually consistent with line breaks outside the comment. This only makes
-  /// sense for block comments.
-  ///
-  /// When \param CommentHasMoreLines is false, no line breaks/trailing
-  /// backslashes will be inserted after it.
-  void splitLineInComment(const FormatToken &Tok, StringRef Line,
-                          size_t StartColumn, StringRef LinePrefix,
-                          bool InPPDirective, bool CommentHasMoreLines,
-                          const char *WhiteSpaceChars = " ") {
-    size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective);
-    const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
-
-    StringRef TrimmedLine = Line.rtrim();
-    int TrailingSpaceLength = Line.size() - TrimmedLine.size();
-
-    // Don't touch leading whitespace.
-    Line = TrimmedLine.ltrim();
-    StartColumn += TrimmedLine.size() - Line.size();
-
-    while (Line.size() + StartColumn > ColumnLimit) {
-      // Try to break at the last whitespace before the column limit.
-      size_t SpacePos =
-          Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1);
-      if (SpacePos == StringRef::npos) {
-        // Try to find any whitespace in the line.
-        SpacePos = Line.find_first_of(WhiteSpaceChars);
-        if (SpacePos == StringRef::npos) // No whitespace found, give up.
-          break;
-      }
-
-      StringRef NextCut = Line.substr(0, SpacePos).rtrim();
-      StringRef RemainingLine = Line.substr(SpacePos).ltrim();
-      if (RemainingLine.empty())
-        break;
-
-      if (RemainingLine == "*/" && LinePrefix.endswith("* "))
-        LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2);
-
-      Line = RemainingLine;
-
-      size_t ReplaceChars = Line.begin() - NextCut.end();
-      breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
-                 InPPDirective, 0, NextCut.size() + StartColumn);
-      StartColumn = LinePrefix.size();
-    }
-
-    if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) {
-      // Remove trailing whitespace/insert backslash. + 1 is for \n
-      breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "",
-                 InPPDirective, 0, Line.size() + StartColumn);
-    }
-  }
-
-  /// \brief Changes indentation of all lines in a block comment by Indent,
-  /// removes trailing whitespace from each line, splits lines that end up
-  /// exceeding the column limit.
-  void indentBlockComment(const AnnotatedToken &Tok, int Indent,
-                          int WhitespaceStartColumn, int NewLines,
-                          bool InPPDirective) {
-    assert(Tok.Type == TT_BlockComment);
-    int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0);
-    const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
-    const int IndentDelta = Indent - CurrentIndent;
-    const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
-                         Tok.FormatTok.TokenLength);
-    assert(Text.startswith("/*") && Text.endswith("*/"));
-
-    SmallVector<StringRef, 16> Lines;
-    Text.split(Lines, "\n");
-
-    if (IndentDelta > 0) {
-      std::string WhiteSpace(IndentDelta, ' ');
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            0, WhiteSpace));
-      }
-    } else if (IndentDelta < 0) {
-      std::string WhiteSpace(-IndentDelta, ' ');
-      // Check that the line is indented enough.
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        if (!Lines[i].startswith(WhiteSpace))
-          return;
-      }
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            -IndentDelta, ""));
-      }
-    }
-
-    // Split long lines in comments.
-    size_t OldPrefixSize = 0;
-    std::string NewPrefix;
-    if (Lines.size() > 1) {
-      StringRef CurrentPrefix = findCommentLinesPrefix(Lines);
-      OldPrefixSize = CurrentPrefix.size();
-      NewPrefix = (IndentDelta < 0)
-                  ? CurrentPrefix.substr(-IndentDelta).str()
-                  : std::string(IndentDelta, ' ') + CurrentPrefix.str();
-      if (CurrentPrefix.endswith("*")) {
-        NewPrefix += " ";
-        ++OldPrefixSize;
-      }
-    } else if (Tok.Parent == 0) {
-      NewPrefix = std::string(StartColumn, ' ') + " * ";
-    }
-
-    StartColumn += 2;
-    for (size_t i = 0; i < Lines.size(); ++i) {
-      StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize);
-      splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix,
-                         InPPDirective, i != Lines.size() - 1);
-      StartColumn = NewPrefix.size();
-    }
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
-    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces,
-                             unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (NewLines > 0) {
-      unsigned Offset =
-          std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
-      for (unsigned i = 0; i < NewLines; ++i) {
-        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
-        NewLineText += "\\\n";
-        Offset = 0;
-      }
-    }
-    return NewLineText + std::string(Spaces, ' ');
-  }
-
-  /// \brief Structure to store a comment for later layout and alignment.
-  struct StoredComment {
-    FormatToken Tok;
-    unsigned MinColumn;
-    unsigned MaxColumn;
-    unsigned NewLines;
-    unsigned Spaces;
-    bool Untouchable;
-  };
-  SmallVector<StoredComment, 16> Comments;
-  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
-
-  /// \brief Try to align all stashed comments.
-  void alignComments() {
-    unsigned MinColumn = 0;
-    unsigned MaxColumn = UINT_MAX;
-    comment_iterator Start = Comments.begin();
-    for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
-      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
-        alignComments(Start, I, MinColumn);
-        MinColumn = I->MinColumn;
-        MaxColumn = I->MaxColumn;
-        Start = I;
-      } else {
-        MinColumn = std::max(MinColumn, I->MinColumn);
-        MaxColumn = std::min(MaxColumn, I->MaxColumn);
-      }
-    }
-    alignComments(Start, Comments.end(), MinColumn);
-    Comments.clear();
-  }
-
-  /// \brief Put all the comments between \p I and \p E into \p Column.
-  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
-    while (I != E) {
-      if (!I->Untouchable) {
-        unsigned Spaces = I->Spaces + Column - I->MinColumn;
-        storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
-      }
-      ++I;
-    }
-  }
-
-  /// \brief Stores \p Text as the replacement for the whitespace in front of
-  /// \p Tok.
-  void storeReplacement(const FormatToken &Tok, const std::string Text) {
-    // Don't create a replacement, if it does not change anything.
-    if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
-                  Tok.WhiteSpaceLength) == Text)
-      return;
-
-    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
-                                         Tok.WhiteSpaceLength, Text));
-  }
-
-  SourceManager &SourceMgr;
-  tooling::Replacements Replaces;
-  const FormatStyle &Style;
-};
-
 class UnwrappedLineFormatter {
 public:
   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
@@ -602,7 +243,7 @@ private:
       if (StartOfFunctionCall != Other.StartOfFunctionCall)
         return StartOfFunctionCall < Other.StartOfFunctionCall;
       if (NestedNameSpecifierContinuation !=
-              Other.NestedNameSpecifierContinuation)
+          Other.NestedNameSpecifierContinuation)
         return NestedNameSpecifierContinuation <
                Other.NestedNameSpecifierContinuation;
       if (CallContinuation != Other.CallContinuation)
@@ -647,7 +288,7 @@ private:
       if (Column != Other.Column)
         return Column < Other.Column;
       if (LineContainsContinuedForLoopSection !=
-              Other.LineContainsContinuedForLoopSection)
+          Other.LineContainsContinuedForLoopSection)
         return LineContainsContinuedForLoopSection;
       if (ParenLevel != Other.ParenLevel)
         return ParenLevel < Other.ParenLevel;
@@ -806,7 +447,7 @@ private:
       if (Current.Type == TT_ObjCSelectorName &&
           State.Stack.back().ColonPos == 0) {
         if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
-                State.Column + Spaces + Current.FormatTok.TokenLength)
+            State.Column + Spaces + Current.FormatTok.TokenLength)
           State.Stack.back().ColonPos =
               State.Stack.back().Indent + Current.LongestObjCSelectorName;
         else
@@ -970,115 +611,78 @@ private:
   /// it if possible.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
                                 bool DryRun) {
-    if (Current.isNot(tok::string_literal))
+    llvm::OwningPtr<BreakableToken> Token;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    if (Current.is(tok::string_literal)) {
+      // Only break up default narrow strings.
+      const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+      if (!LiteralData || *LiteralData != '"')
+        return 0;
+
+      Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
+    } else if (Current.Type == TT_BlockComment) {
+      BreakableBlockComment *BBC =
+          new BreakableBlockComment(SourceMgr, Current, StartColumn);
+      if (!DryRun)
+        BBC->alignLines(Whitespaces);
+      Token.reset(BBC);
+    } else {
       return 0;
-    // Only break up default narrow strings.
-    const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
-    if (!LiteralData || *LiteralData != '"')
+    }
+
+    if (Token->getPrefixLength() + Token->getSuffixLength(0) >
+        getColumnLimit()) {
       return 0;
+    }
 
+    bool BreakInserted = false;
     unsigned Penalty = 0;
-    unsigned TailOffset = 0;
-    unsigned TailLength = Current.FormatTok.TokenLength;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
-    unsigned OffsetFromStart = 0;
-    while (StartColumn + TailLength > getColumnLimit()) {
-      StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
-      if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
-        break;
-      StringRef::size_type SplitPoint = getSplitPoint(
-          Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
-      if (SplitPoint == StringRef::npos)
-        break;
-      assert(SplitPoint != 0);
-      // +2, because 'Text' starts after the opening quotes, and does not
-      // include the closing quote we need to insert.
-      unsigned WhitespaceStartColumn =
-          StartColumn + OffsetFromStart + SplitPoint + 2;
-      State.Stack.back().LastSpace = StartColumn;
+    for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
+         ++LineIndex) {
+      unsigned TokenLineSize = Token->getLineSize(LineIndex);
+      unsigned TailOffset = 0;
+      unsigned RemainingLength =
+          Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+      while (RemainingLength > getColumnLimit()) {
+        unsigned DecorationLength =
+            RemainingLength - (TokenLineSize - TailOffset);
+        if (DecorationLength + 1 > getColumnLimit()) {
+          // Can't reduce line length by splitting here.
+          break;
+        }
+        BreakableToken::Split Split =
+            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
+        if (Split.first == StringRef::npos)
+          break;
+        assert(Split.first != 0);
+        if (!DryRun) {
+          Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
+                             Whitespaces);
+
author	Alexander Kornienko <alexfh@google.com>	2013-04-15 14:28:00 +0000
committer	Alexander Kornienko <alexfh@google.com>	2013-04-15 14:28:00 +0000
commit	70ce7881fc30a39b795b2873f008e7eca72ba669 (patch)
tree	f9190f278da3d6f81517e5f69243d13f3c46f15e /lib/Format
parent	115ac5ac1281e6f301da4da6a5c669beae59ffcc (diff)