Rev 4609: Merge the improved topo_sort code. in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted
John Arbash Meinel
john at arbash-meinel.com
Thu Aug 13 21:37:43 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted
------------------------------------------------------------
revno: 4609 [merge]
revision-id: john at arbash-meinel.com-20090813203734-2sq60sc68wzrj0z2
parent: john at arbash-meinel.com-20090813201512-yewwimsurqgc0ym0
parent: maarten_bosmans-20090809210557-max8vgj1aj5i5nr3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Thu 2009-08-13 15:37:34 -0500
message:
Merge the improved topo_sort code.
modified:
bzrlib/reconcile.py reweave_inventory.py-20051108164726-1e5e0934febac06e
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/blackbox/test_ancestry.py test_ancestry.py-20060131142602-6d9524c490537e90
bzrlib/tests/test_tsort.py testtsort.py-20051025073946-27da871c394d5be4
bzrlib/tsort.py tsort.py-20051025073946-7808f6aaf7d07208
-------------- next part --------------
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py 2009-06-10 03:56:49 +0000
+++ b/bzrlib/reconcile.py 2009-07-31 22:48:55 +0000
@@ -33,7 +33,7 @@
repofmt,
)
from bzrlib.trace import mutter, note
-from bzrlib.tsort import TopoSorter
+from bzrlib.tsort import topo_sort
from bzrlib.versionedfile import AdapterFactory, FulltextContentFactory
@@ -247,8 +247,7 @@
# we have topological order of revisions and non ghost parents ready.
self._setup_steps(len(self._rev_graph))
- revision_keys = [(rev_id,) for rev_id in
- TopoSorter(self._rev_graph.items()).iter_topo_order()]
+ revision_keys = [(rev_id,) for rev_id in topo_sort(self._rev_graph)]
stream = self._change_inv_parents(
self.inventory.get_record_stream(revision_keys, 'unordered', True),
self._new_inv_parents,
@@ -378,7 +377,7 @@
new_inventories = self.repo._temp_inventories()
# we have topological order of revisions and non ghost parents ready.
graph = self.revisions.get_parent_map(self.revisions.keys())
- revision_keys = list(TopoSorter(graph).iter_topo_order())
+ revision_keys = topo_sort(graph)
revision_ids = [key[-1] for key in revision_keys]
self._setup_steps(len(revision_keys))
stream = self._change_inv_parents(
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-08-06 02:23:37 +0000
+++ b/bzrlib/repository.py 2009-08-13 20:37:34 +0000
@@ -4311,7 +4311,7 @@
phase = 'file'
revs = search.get_keys()
graph = self.from_repository.get_graph()
- revs = list(graph.iter_topo_order(revs))
+ revs = tsort.topo_sort(graph.get_parent_map(revs))
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
text_keys = []
for knit_kind, file_id, revisions in data_to_fetch:
=== modified file 'bzrlib/tests/blackbox/test_ancestry.py'
--- a/bzrlib/tests/blackbox/test_ancestry.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/blackbox/test_ancestry.py 2009-07-31 22:48:55 +0000
@@ -44,7 +44,7 @@
def _check_ancestry(self, location='', result=None):
out = self.run_bzr(['ancestry', location])[0]
if result is None:
- result = "A1\nB1\nA2\nA3\n"
+ result = "A1\nA2\nB1\nA3\n"
self.assertEqualDiff(out, result)
def test_ancestry(self):
=== modified file 'bzrlib/tests/test_tsort.py'
--- a/bzrlib/tests/test_tsort.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_tsort.py 2009-07-30 12:55:32 +0000
@@ -39,6 +39,20 @@
list,
TopoSorter(graph).iter_topo_order())
+ def assertSortAndIterateOrder(self, graph):
+ """Check that sorting and iter_topo_order on graph really results in topological order.
+
+ For every child in the graph, check if it comes after all of it's parents.
+ """
+ sort_result = topo_sort(graph)
+ iter_result = list(TopoSorter(graph).iter_topo_order())
+ for (node, parents) in graph:
+ for parent in parents:
+ self.assertTrue(sort_result.index(node) > sort_result.index(parent),
+ "parent %s must come before child %s:\n%s" % (parent, node, sort_result))
+ self.assertTrue(iter_result.index(node) > iter_result.index(parent),
+ "parent %s must come before child %s:\n%s" % (parent, node, iter_result))
+
def test_tsort_empty(self):
"""TopoSort empty list"""
self.assertSortAndIterate([], [])
@@ -72,10 +86,10 @@
def test_tsort_partial(self):
"""Topological sort with partial ordering.
- If the graph does not give an order between two nodes, they are
- returned in lexicographical order.
+ Multiple correct orderings are possible, so test for
+ correctness, not for exact match on the resulting list.
"""
- self.assertSortAndIterate(([(0, []),
+ self.assertSortAndIterateOrder([(0, []),
(1, [0]),
(2, [0]),
(3, [0]),
@@ -83,8 +97,7 @@
(5, [1, 2]),
(6, [1, 2]),
(7, [2, 3]),
- (8, [0, 1, 4, 5, 6])]),
- [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ (8, [0, 1, 4, 5, 6])])
def test_tsort_unincluded_parent(self):
"""Sort nodes, but don't include some parents in the output"""
=== modified file 'bzrlib/tsort.py'
--- a/bzrlib/tsort.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tsort.py 2009-08-09 21:05:57 +0000
@@ -20,6 +20,7 @@
from bzrlib import errors
import bzrlib.revision as _mod_revision
+from collections import deque
__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"]
@@ -34,8 +35,57 @@
their children.
node identifiers can be any hashable object, and are typically strings.
+
+ This function has the same purpose as the TopoSorter class, but uses a
+ different algorithm to sort the graph. That means that while both return a list
+ with parents before their child nodes, the exact ordering can be different.
+
+ topo_sort is faster when the whole list is needed, while when iterating over a
+ part of the list, TopoSorter.iter_topo_order should be used.
"""
- return TopoSorter(graph).sorted()
+ # store a dict of the graph.
+ graph = dict(graph)
+ # this is the stack storing on which the sorted nodes are pushed.
+ node_stack = []
+
+ # count the number of children for every node in the graph
+ node_child_count = dict.fromkeys(graph.iterkeys(), 0)
+ for parents in graph.itervalues():
+ for parent in parents:
+ # don't count the parent if it's a ghost
+ if parent in node_child_count:
+ node_child_count[parent] += 1
+ # keep track of nodes without children in a separate list
+ nochild_nodes = deque([node for (node, n) in node_child_count.iteritems() if n == 0])
+
+ graph_pop = graph.pop
+ node_stack_append = node_stack.append
+ nochild_nodes_pop = nochild_nodes.pop
+ nochild_nodes_append = nochild_nodes.append
+
+ while nochild_nodes:
+ # pick a node without a child and add it to the stack.
+ node_name = nochild_nodes_pop()
+ node_stack_append(node_name)
+
+ # the parents of the node lose it as a child; if it was the last
+ # child, add the parent to the list of childless nodes.
+ parents = graph_pop(node_name)
+ for parent in parents:
+ if parent in node_child_count:
+ node_child_count[parent] -= 1
+ if node_child_count[parent] == 0:
+ nochild_nodes_append(parent)
+
+ # if there are still nodes left in the graph,
+ # that means that there is a cycle
+ if graph:
+ raise errors.GraphCycleError(graph)
+
+ # the nodes where pushed on the stack child first, so this list needs to be
+ # reversed before returning it.
+ node_stack.reverse()
+ return node_stack
class TopoSorter(object):
@@ -60,22 +110,8 @@
iteration or sorting may raise GraphCycleError if a cycle is present
in the graph.
"""
- # a dict of the graph.
+ # store a dict of the graph.
self._graph = dict(graph)
- self._visitable = set(self._graph)
- ### if debugging:
- # self._original_graph = dict(graph)
-
- # this is a stack storing the depth first search into the graph.
- self._node_name_stack = []
- # at each level of 'recursion' we have to check each parent. This
- # stack stores the parents we have not yet checked for the node at the
- # matching depth in _node_name_stack
- self._pending_parents_stack = []
- # this is a set of the completed nodes for fast checking whether a
- # parent in a node we are processing on the stack has already been
- # emitted and thus can be skipped.
- self._completed_node_names = set()
def sorted(self):
"""Sort the graph and return as a list.
@@ -100,67 +136,64 @@
After finishing iteration the sorter is empty and you cannot continue
iteration.
"""
- while self._graph:
+ graph = self._graph
+ visitable = set(graph)
+
+ # this is a stack storing the depth first search into the graph.
+ pending_node_stack = []
+ # at each level of 'recursion' we have to check each parent. This
+ # stack stores the parents we have not yet checked for the node at the
+ # matching depth in pending_node_stack
+ pending_parents_stack = []
+
+ # this is a set of the completed nodes for fast checking whether a
+ # parent in a node we are processing on the stack has already been
+ # emitted and thus can be skipped.
+ completed_node_names = set()
+
+ while graph:
# now pick a random node in the source graph, and transfer it to the
- # top of the depth first search stack.
- node_name, parents = self._graph.popitem()
- self._push_node(node_name, parents)
- while self._node_name_stack:
- # loop until this call completes.
- parents_to_visit = self._pending_parents_stack[-1]
- # if all parents are done, the revision is done
+ # top of the depth first search stack of pending nodes.
+ node_name, parents = graph.popitem()
+ pending_node_stack.append(node_name)
+ pending_parents_stack.append(list(parents))
+
+ # loop until pending_node_stack is empty
+ while pending_node_stack:
+ parents_to_visit = pending_parents_stack[-1]
+ # if there are no parents left, the revision is done
if not parents_to_visit:
# append the revision to the topo sorted list
- # all the nodes parents have been added to the output, now
- # we can add it to the output.
- yield self._pop_node()
+ # all the nodes parents have been added to the output,
+ # now we can add it to the output.
+ popped_node = pending_node_stack.pop()
+ pending_parents_stack.pop()
+ completed_node_names.add(popped_node)
+ yield popped_node
else:
- while self._pending_parents_stack[-1]:
- # recurse depth first into a single parent
- next_node_name = self._pending_parents_stack[-1].pop()
- if next_node_name in self._completed_node_names:
- # this parent was completed by a child on the
- # call stack. skip it.
- continue
- if next_node_name not in self._visitable:
- continue
- # otherwise transfer it from the source graph into the
- # top of the current depth first search stack.
- try:
- parents = self._graph.pop(next_node_name)
- except KeyError:
- # if the next node is not in the source graph it has
- # already been popped from it and placed into the
- # current search stack (but not completed or we would
- # have hit the continue 4 lines up.
- # this indicates a cycle.
- raise errors.GraphCycleError(self._node_name_stack)
- self._push_node(next_node_name, parents)
- # and do not continue processing parents until this 'call'
- # has recursed.
- break
-
- def _push_node(self, node_name, parents):
- """Add node_name to the pending node stack.
-
- Names in this stack will get emitted into the output as they are popped
- off the stack.
- """
- self._node_name_stack.append(node_name)
- self._pending_parents_stack.append(list(parents))
-
- def _pop_node(self):
- """Pop the top node off the stack
-
- The node is appended to the sorted output.
- """
- # we are returning from the flattened call frame:
- # pop off the local variables
- node_name = self._node_name_stack.pop()
- self._pending_parents_stack.pop()
-
- self._completed_node_names.add(node_name)
- return node_name
+ # recurse depth first into a single parent
+ next_node_name = parents_to_visit.pop()
+
+ if next_node_name in completed_node_names:
+ # parent was already completed by a child, skip it.
+ continue
+ if next_node_name not in visitable:
+ # parent is not a node in the original graph, skip it.
+ continue
+
+ # transfer it along with its parents from the source graph
+ # into the top of the current depth first search stack.
+ try:
+ parents = graph.pop(next_node_name)
+ except KeyError:
+ # if the next node is not in the source graph it has
+ # already been popped from it and placed into the
+ # current search stack (but not completed or we would
+ # have hit the continue 6 lines up). this indicates a
+ # cycle.
+ raise errors.GraphCycleError(pending_node_stack)
+ pending_node_stack.append(next_node_name)
+ pending_parents_stack.append(list(parents))
def merge_sort(graph, branch_tip, mainline_revisions=None, generate_revno=False):
More information about the bazaar-commits
mailing list