Rev 4609: Merge the improved topo_sort code. in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

John Arbash Meinel john at arbash-meinel.com
Thu Aug 13 21:37:43 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

------------------------------------------------------------
revno: 4609 [merge]
revision-id: john at arbash-meinel.com-20090813203734-2sq60sc68wzrj0z2
parent: john at arbash-meinel.com-20090813201512-yewwimsurqgc0ym0
parent: maarten_bosmans-20090809210557-max8vgj1aj5i5nr3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Thu 2009-08-13 15:37:34 -0500
message:
  Merge the improved topo_sort code.
modified:
  bzrlib/reconcile.py            reweave_inventory.py-20051108164726-1e5e0934febac06e
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/blackbox/test_ancestry.py test_ancestry.py-20060131142602-6d9524c490537e90
  bzrlib/tests/test_tsort.py     testtsort.py-20051025073946-27da871c394d5be4
  bzrlib/tsort.py                tsort.py-20051025073946-7808f6aaf7d07208
-------------- next part --------------
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/reconcile.py	2009-07-31 22:48:55 +0000
@@ -33,7 +33,7 @@
     repofmt,
     )
 from bzrlib.trace import mutter, note
-from bzrlib.tsort import TopoSorter
+from bzrlib.tsort import topo_sort
 from bzrlib.versionedfile import AdapterFactory, FulltextContentFactory
 
 
@@ -247,8 +247,7 @@
 
         # we have topological order of revisions and non ghost parents ready.
         self._setup_steps(len(self._rev_graph))
-        revision_keys = [(rev_id,) for rev_id in
-            TopoSorter(self._rev_graph.items()).iter_topo_order()]
+        revision_keys = [(rev_id,) for rev_id in topo_sort(self._rev_graph)]
         stream = self._change_inv_parents(
             self.inventory.get_record_stream(revision_keys, 'unordered', True),
             self._new_inv_parents,
@@ -378,7 +377,7 @@
         new_inventories = self.repo._temp_inventories()
         # we have topological order of revisions and non ghost parents ready.
         graph = self.revisions.get_parent_map(self.revisions.keys())
-        revision_keys = list(TopoSorter(graph).iter_topo_order())
+        revision_keys = topo_sort(graph)
         revision_ids = [key[-1] for key in revision_keys]
         self._setup_steps(len(revision_keys))
         stream = self._change_inv_parents(

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-08-06 02:23:37 +0000
+++ b/bzrlib/repository.py	2009-08-13 20:37:34 +0000
@@ -4311,7 +4311,7 @@
         phase = 'file'
         revs = search.get_keys()
         graph = self.from_repository.get_graph()
-        revs = list(graph.iter_topo_order(revs))
+        revs = tsort.topo_sort(graph.get_parent_map(revs))
         data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
         text_keys = []
         for knit_kind, file_id, revisions in data_to_fetch:

=== modified file 'bzrlib/tests/blackbox/test_ancestry.py'
--- a/bzrlib/tests/blackbox/test_ancestry.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/blackbox/test_ancestry.py	2009-07-31 22:48:55 +0000
@@ -44,7 +44,7 @@
     def _check_ancestry(self, location='', result=None):
         out = self.run_bzr(['ancestry', location])[0]
         if result is None:
-            result = "A1\nB1\nA2\nA3\n"
+            result = "A1\nA2\nB1\nA3\n"
         self.assertEqualDiff(out, result)
 
     def test_ancestry(self):

=== modified file 'bzrlib/tests/test_tsort.py'
--- a/bzrlib/tests/test_tsort.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_tsort.py	2009-07-30 12:55:32 +0000
@@ -39,6 +39,20 @@
                           list,
                           TopoSorter(graph).iter_topo_order())
 
+    def assertSortAndIterateOrder(self, graph):
+        """Check that sorting and iter_topo_order on graph really results in topological order.
+
+        For every child in the graph, check if it comes after all of it's parents.
+        """
+        sort_result = topo_sort(graph)
+        iter_result = list(TopoSorter(graph).iter_topo_order())
+        for (node, parents) in graph:
+            for parent in parents:
+                self.assertTrue(sort_result.index(node) > sort_result.index(parent),
+                    "parent %s must come before child %s:\n%s" % (parent, node, sort_result))
+                self.assertTrue(iter_result.index(node) > iter_result.index(parent),
+                    "parent %s must come before child %s:\n%s" % (parent, node, iter_result))
+
     def test_tsort_empty(self):
         """TopoSort empty list"""
         self.assertSortAndIterate([], [])
@@ -72,10 +86,10 @@
     def test_tsort_partial(self):
         """Topological sort with partial ordering.
 
-        If the graph does not give an order between two nodes, they are
-        returned in lexicographical order.
+        Multiple correct orderings are possible, so test for 
+        correctness, not for exact match on the resulting list.
         """
-        self.assertSortAndIterate(([(0, []),
+        self.assertSortAndIterateOrder([(0, []),
                                    (1, [0]),
                                    (2, [0]),
                                    (3, [0]),
@@ -83,8 +97,7 @@
                                    (5, [1, 2]),
                                    (6, [1, 2]),
                                    (7, [2, 3]),
-                                   (8, [0, 1, 4, 5, 6])]),
-                                  [0, 1, 2, 3, 4, 5, 6, 7, 8])
+                                   (8, [0, 1, 4, 5, 6])])
 
     def test_tsort_unincluded_parent(self):
         """Sort nodes, but don't include some parents in the output"""

=== modified file 'bzrlib/tsort.py'
--- a/bzrlib/tsort.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tsort.py	2009-08-09 21:05:57 +0000
@@ -20,6 +20,7 @@
 
 from bzrlib import errors
 import bzrlib.revision as _mod_revision
+from collections import deque
 
 
 __all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"]
@@ -34,8 +35,57 @@
     their children.
 
     node identifiers can be any hashable object, and are typically strings.
+
+    This function has the same purpose as the TopoSorter class, but uses a
+    different algorithm to sort the graph. That means that while both return a list
+    with parents before their child nodes, the exact ordering can be different.
+
+    topo_sort is faster when the whole list is needed, while when iterating over a
+    part of the list, TopoSorter.iter_topo_order should be used.
     """
-    return TopoSorter(graph).sorted()
+    # store a dict of the graph.
+    graph = dict(graph)
+    # this is the stack storing on which the sorted nodes are pushed.
+    node_stack = []
+
+    # count the number of children for every node in the graph
+    node_child_count = dict.fromkeys(graph.iterkeys(), 0)
+    for parents in graph.itervalues():
+        for parent in parents:
+            # don't count the parent if it's a ghost
+            if parent in node_child_count:
+                node_child_count[parent] += 1
+    # keep track of nodes without children in a separate list
+    nochild_nodes = deque([node for (node, n) in node_child_count.iteritems() if n == 0])
+
+    graph_pop = graph.pop
+    node_stack_append = node_stack.append
+    nochild_nodes_pop = nochild_nodes.pop
+    nochild_nodes_append = nochild_nodes.append
+
+    while nochild_nodes:
+        # pick a node without a child and add it to the stack.
+        node_name = nochild_nodes_pop()
+        node_stack_append(node_name)
+
+        # the parents of the node lose it as a child; if it was the last
+        # child, add the parent to the list of childless nodes.
+        parents = graph_pop(node_name)
+        for parent in parents:
+            if parent in node_child_count:
+                node_child_count[parent] -= 1
+                if node_child_count[parent] == 0:
+                    nochild_nodes_append(parent)
+
+    # if there are still nodes left in the graph,
+    # that means that there is a cycle
+    if graph:
+        raise errors.GraphCycleError(graph)
+
+    # the nodes where pushed on the stack child first, so this list needs to be
+    # reversed before returning it.
+    node_stack.reverse()
+    return node_stack
 
 
 class TopoSorter(object):
@@ -60,22 +110,8 @@
         iteration or sorting may raise GraphCycleError if a cycle is present
         in the graph.
         """
-        # a dict of the graph.
+        # store a dict of the graph.
         self._graph = dict(graph)
-        self._visitable = set(self._graph)
-        ### if debugging:
-        # self._original_graph = dict(graph)
-
-        # this is a stack storing the depth first search into the graph.
-        self._node_name_stack = []
-        # at each level of 'recursion' we have to check each parent. This
-        # stack stores the parents we have not yet checked for the node at the
-        # matching depth in _node_name_stack
-        self._pending_parents_stack = []
-        # this is a set of the completed nodes for fast checking whether a
-        # parent in a node we are processing on the stack has already been
-        # emitted and thus can be skipped.
-        self._completed_node_names = set()
 
     def sorted(self):
         """Sort the graph and return as a list.
@@ -100,67 +136,64 @@
         After finishing iteration the sorter is empty and you cannot continue
         iteration.
         """
-        while self._graph:
+        graph = self._graph
+        visitable = set(graph)
+
+        # this is a stack storing the depth first search into the graph.
+        pending_node_stack = []
+        # at each level of 'recursion' we have to check each parent. This
+        # stack stores the parents we have not yet checked for the node at the
+        # matching depth in pending_node_stack
+        pending_parents_stack = []
+
+        # this is a set of the completed nodes for fast checking whether a
+        # parent in a node we are processing on the stack has already been
+        # emitted and thus can be skipped.
+        completed_node_names = set()
+
+        while graph:
             # now pick a random node in the source graph, and transfer it to the
-            # top of the depth first search stack.
-            node_name, parents = self._graph.popitem()
-            self._push_node(node_name, parents)
-            while self._node_name_stack:
-                # loop until this call completes.
-                parents_to_visit = self._pending_parents_stack[-1]
-                # if all parents are done, the revision is done
+            # top of the depth first search stack of pending nodes.
+            node_name, parents = graph.popitem()
+            pending_node_stack.append(node_name)
+            pending_parents_stack.append(list(parents))
+
+            # loop until pending_node_stack is empty
+            while pending_node_stack:
+                parents_to_visit = pending_parents_stack[-1]
+                # if there are no parents left, the revision is done
                 if not parents_to_visit:
                     # append the revision to the topo sorted list
-                    # all the nodes parents have been added to the output, now
-                    # we can add it to the output.
-                    yield self._pop_node()
+                    # all the nodes parents have been added to the output,
+                    # now we can add it to the output.
+                    popped_node = pending_node_stack.pop()
+                    pending_parents_stack.pop()
+                    completed_node_names.add(popped_node)
+                    yield popped_node
                 else:
-                    while self._pending_parents_stack[-1]:
-                        # recurse depth first into a single parent
-                        next_node_name = self._pending_parents_stack[-1].pop()
-                        if next_node_name in self._completed_node_names:
-                            # this parent was completed by a child on the
-                            # call stack. skip it.
-                            continue
-                        if next_node_name not in self._visitable:
-                            continue
-                        # otherwise transfer it from the source graph into the
-                        # top of the current depth first search stack.
-                        try:
-                            parents = self._graph.pop(next_node_name)
-                        except KeyError:
-                            # if the next node is not in the source graph it has
-                            # already been popped from it and placed into the
-                            # current search stack (but not completed or we would
-                            # have hit the continue 4 lines up.
-                            # this indicates a cycle.
-                            raise errors.GraphCycleError(self._node_name_stack)
-                        self._push_node(next_node_name, parents)
-                        # and do not continue processing parents until this 'call'
-                        # has recursed.
-                        break
-
-    def _push_node(self, node_name, parents):
-        """Add node_name to the pending node stack.
-
-        Names in this stack will get emitted into the output as they are popped
-        off the stack.
-        """
-        self._node_name_stack.append(node_name)
-        self._pending_parents_stack.append(list(parents))
-
-    def _pop_node(self):
-        """Pop the top node off the stack
-
-        The node is appended to the sorted output.
-        """
-        # we are returning from the flattened call frame:
-        # pop off the local variables
-        node_name = self._node_name_stack.pop()
-        self._pending_parents_stack.pop()
-
-        self._completed_node_names.add(node_name)
-        return node_name
+                    # recurse depth first into a single parent
+                    next_node_name = parents_to_visit.pop()
+
+                    if next_node_name in completed_node_names:
+                        # parent was already completed by a child, skip it.
+                        continue
+                    if next_node_name not in visitable:
+                        # parent is not a node in the original graph, skip it.
+                        continue
+
+                    # transfer it along with its parents from the source graph
+                    # into the top of the current depth first search stack.
+                    try:
+                        parents = graph.pop(next_node_name)
+                    except KeyError:
+                        # if the next node is not in the source graph it has
+                        # already been popped from it and placed into the
+                        # current search stack (but not completed or we would
+                        # have hit the continue 6 lines up).  this indicates a
+                        # cycle.
+                        raise errors.GraphCycleError(pending_node_stack)
+                    pending_node_stack.append(next_node_name)
+                    pending_parents_stack.append(list(parents))
 
 
 def merge_sort(graph, branch_tip, mainline_revisions=None, generate_revno=False):



More information about the bazaar-commits mailing list