An attempt to make the search algorithm easier to understand.

- clear ownership: the SpecificBumpPtrAllocator owns all StateNodes - this allows us to simplify the memoization data structure into a std::set (FIXME: figure out whether we want to use a hash based data structure). - introduces StateNode as recursive data structure, instead of using Edge and the Seen-map combined to drill through the graph - using a count to stabilize the penalty instead of relying on the container - pulled out a method to forward-apply states in the end This leads to a ~40% runtime decrease on Nico's benchmark. Main FiXME is that the parameter lists of some function get too long. I'd vote for either pulling the Queue etc into the Formatter proper, or creating an inner class just for the search algorithm. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@175051 91177308-0d34-0410-b5e6-96231b3b80d8
author: Manuel Klimek <klimek@google.com> 2013-02-13 10:46:36 +0000
committer: Manuel Klimek <klimek@google.com> 2013-02-13 10:46:36 +0000
commit: 32a2fd7631026f2fe248381546c5e6149f4f95ee (patch)
tree: 5fd3bdb030d9168fd9359c5ae9c293665bc77ef7 /lib/Format
parent: 59660c21178b6af518bd4b564e032d5c9cc218cb (diff)
1 files changed, 86 insertions, 49 deletions
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index f29308e509..35389d140d 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -23,7 +23,9 @@
 #include "clang/Format/Format.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
+#include <queue>
 #include <string>
 
 namespace clang {
@@ -633,13 +635,31 @@ private:
     return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
   }
 
-  /// \brief An edge in the solution space starting from the \c LineState and
-  /// inserting a newline dependent on the \c bool.
-  typedef std::pair<bool, const LineState *> Edge;
+  /// \brief An edge in the solution space from \c Previous->State to \c State,
+  /// inserting a newline dependent on the \c NewLine.
+  struct StateNode {
+    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
+        : State(State), NewLine(NewLine), Previous(Previous) {
+    }
+    LineState State;
+    bool NewLine;
+    StateNode *Previous;
+  };
+
+  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+  ///
+  /// In case of equal penalties, we want to prefer states that were inserted
+  /// first. During state generation we make sure that we insert states first
+  /// that break the line as late as possible.
+  typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
+  /// \c State has the given \c OrderedPenalty.
+  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
 
-  /// \brief An item in the prioritized BFS search queue. The \c LineState was
-  /// reached using the \c Edge.
-  typedef std::pair<LineState, Edge> QueueItem;
+  /// \brief The BFS queue type.
+  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+                              std::greater<QueueItem> > QueueType;
 
   /// \brief Analyze the entire solution space starting from \p InitialState.
   ///
@@ -647,32 +667,40 @@ private:
   /// the solution space (\c LineStates are the nodes). The algorithm tries to
   /// find the shortest path (the one with lowest penalty) from \p InitialState
   /// to a state where all tokens are placed.
-  unsigned analyzeSolutionSpace(const LineState &InitialState) {
+  unsigned analyzeSolutionSpace(LineState &InitialState) {
+    llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+
+    // Increasing count of \c StateNode items we have created. This is used
+    // to create a deterministic order independent of the container.
+    unsigned Count = 0;
+    QueueType Queue;
+
+    std::set<LineState> Seen;
+
     // Insert start element into queue.
-    std::multimap<unsigned, QueueItem> Queue;
-    Queue.insert(std::pair<unsigned, QueueItem>(
-        0, QueueItem(InitialState, Edge(false, (const LineState *)0))));
-    std::map<LineState, Edge> Seen;
+    StateNode *Node=
+        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
+    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
+    ++Count;
 
     // While not empty, take first element and follow edges.
     while (!Queue.empty()) {
-      unsigned Penalty = Queue.begin()->first;
-      QueueItem Item = Queue.begin()->second;
-      if (Item.first.NextToken == NULL) {
+      unsigned Penalty = Queue.top().first.first;
+      StateNode *Node= Queue.top().second;
+      if (Node->State.NextToken == NULL) {
         DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
         break;
       }
-      Queue.erase(Queue.begin());
+      Queue.pop();
 
-      if (Seen.find(Item.first) != Seen.end())
-        continue; // State already examined with lower penalty.
-
-      const LineState &SavedState = Seen.insert(std::pair<LineState, Edge>(
-          Item.first,
-          Edge(Item.second.first, Item.second.second))).first->first;
+      if (!Seen.insert(Node->State).second)
+        // State already examined with lower penalty.
+        continue;
 
-      addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ false, Queue);
-      addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ true, Queue);
+      addNextStateToQueue(Allocator, Queue, Count, Penalty, Node,
+                          /*NewLine=*/ false);
+      addNextStateToQueue(Allocator, Queue, Count, Penalty, Node,
+                          /*NewLine=*/ true);
     }
 
     if (Queue.empty())
@@ -681,46 +709,55 @@ private:
       return 0;
 
     // Reconstruct the solution.
-    // FIXME: Add debugging output.
-    Edge *CurrentEdge = &Queue.begin()->second.second;
-    while (CurrentEdge->second != NULL) {
-      LineState CurrentState = *CurrentEdge->second;
-      if (CurrentEdge->first) {
-        DEBUG(llvm::errs() << "Penalty for splitting before "
-                           << CurrentState.NextToken->FormatTok.Tok.getName()
-                           << ": " << CurrentState.NextToken->SplitPenalty
-                           << "\n");
-      }
-      addTokenToState(CurrentEdge->first, false, CurrentState);
-      CurrentEdge = &Seen[*CurrentEdge->second];
-    }
+    reconstructPath(InitialState, Queue.top().second);
     DEBUG(llvm::errs() << "---\n");
 
     // Return the column after the last token of the solution.
-    return Queue.begin()->second.first.Column;
+    return Queue.top().second->State.Column;
+  }
+
+  void reconstructPath(LineState &State, StateNode *Current) {
+    // FIXME: This recursive implementation limits the possible number
+    // of tokens per line if compiled into a binary with small stack space.
+    // To become more independent of stack frame limitations we would need
+    // to also change the TokenAnnotator.
+    if (Current->Previous == NULL)
+      return;
+    reconstructPath(State, Current->Previous);
+    DEBUG({
+      if (Current->NewLine) {
+        llvm::errs() << "Penalty for splitting before "
+                     << Current->State.NextToken->FormatTok.Tok.getName()
+                     << ": " << Current->State.NextToken->SplitPenalty << "\n";
+      }
+    });
+    addTokenToState(Current->NewLine, false, State);
   }
 
   /// \brief Add the following state to the analysis queue \p Queue.
   ///
   /// Assume the current state is \p OldState and has been reached with a
   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
-  void addNextStateToQueue(const LineState &OldState, unsigned Penalty,
-                           bool NewLine,
-                           std::multimap<unsigned, QueueItem> &Queue) {
-    if (NewLine && !canBreak(OldState))
+  void addNextStateToQueue(llvm::SpecificBumpPtrAllocator<StateNode> &Allocator,
+                           QueueType &Queue, unsigned &Count, unsigned Penalty,
+                           StateNode *PreviousNode, bool NewLine) {
+    if (NewLine && !canBreak(PreviousNode->State))
       return;
-    if (!NewLine && mustBreak(OldState))
+    if (!NewLine && mustBreak(PreviousNode->State))
       return;
-    LineState State(OldState);
     if (NewLine)
-      Penalty += State.NextToken->SplitPenalty;
-    addTokenToState(NewLine, true, State);
-    if (State.Column > getColumnLimit()) {
-      unsigned ExcessCharacters = State.Column - getColumnLimit();
+      Penalty += PreviousNode->State.NextToken->SplitPenalty;
+
+    StateNode *Node = new (Allocator.Allocate())
+        StateNode(PreviousNode->State, NewLine, PreviousNode);
+    addTokenToState(NewLine, true, Node->State);
+    if (Node->State.Column > getColumnLimit()) {
+      unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
     }
-    Queue.insert(std::pair<unsigned, QueueItem>(
-        Penalty, QueueItem(State, Edge(NewLine, &OldState))));
+
+    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
+    ++Count;
   }
 
   /// \brief Returns \c true, if a line break after \p State is allowed.
author	Manuel Klimek <klimek@google.com>	2013-02-13 10:46:36 +0000
committer	Manuel Klimek <klimek@google.com>	2013-02-13 10:46:36 +0000
commit	32a2fd7631026f2fe248381546c5e6149f4f95ee (patch)
tree	5fd3bdb030d9168fd9359c5ae9c293665bc77ef7 /lib/Format
parent	59660c21178b6af518bd4b564e032d5c9cc218cb (diff)