Rev 4616: Approximate implementation in Pyrex. in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

John Arbash Meinel john at arbash-meinel.com
Fri Aug 14 21:12:51 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

------------------------------------------------------------
revno: 4616
revision-id: john at arbash-meinel.com-20090814201235-hwd48ysxqhlj6dpy
parent: john at arbash-meinel.com-20090813221653-5mu0bz3f4cudw6wu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Fri 2009-08-14 15:12:35 -0500
message:
  Approximate implementation in Pyrex.
  
  Mostly just a carbon copy of the python version, having some weird
  issues with the numbering of roots/ghosts. Which is why
  the test existed in the first place :).
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-08-13 22:16:53 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-08-14 20:12:35 +0000
@@ -115,6 +115,8 @@
     return <_KnownGraphNode>temp_node
 
 
+cdef class _MergeSorter
+
 cdef class KnownGraph:
     """This is a class which assumes we already know the full graph."""
 
@@ -389,21 +391,10 @@
 
     def merge_sort(self, tip_key):
         """Compute the merge sorted graph output."""
-        cdef _KnownGraphNode node, parent_node
-        from bzrlib import tsort
-        # TODO: merge_sort doesn't handle ghosts (yet), figure out what to do
-        #       when we want it to.
-        as_parent_map = {}
-        for node in self._nodes.itervalues():
-            parent_keys = []
-            for parent_node in node.parents:
-                parent_keys.append(parent_node.key)
-            as_parent_map[node.key] = parent_keys
-        # We intentionally always generate revnos and never force the
-        # mainline_revisions
-        return tsort.merge_sort(as_parent_map, tip_key,
-                                mainline_revisions=None,
-                                generate_revno=True)
+        cdef _MergeSorter sorter
+
+        sorter = _MergeSorter(self, tip_key)
+        return sorter.topo_order()
 
 
 cdef class _MergeSortNode:
@@ -412,16 +403,15 @@
     cdef _KnownGraphNode node
     cdef Py_ssize_t merge_depth
     cdef int left_subtree_pushed # True/False
-    cdef object pending_parents # list of _MergeSortNode objects
+    cdef object pending_parents # list of _KnownGraphNode
     cdef object revno # tuple of dotted revnos
     cdef int is_first_child # Is this the first child?
-
-
-cdef _MergeSortNode _make_merge_sort_node(_KnownGraphNode node,
-                                          Py_ssize_t merge_depth,
-                                          int left_subtree_pushed):
-    cdef _MergeSortNode ms_node
-    return ms_node
+    # cdef int seen_by_child # A child node has seen this parent
+
+    def __repr__(self):
+        return '_MSN(%s depth:%s lp:%s rev:%s)' % (#self.__class__.__name__,
+            self.node.key, self.merge_depth, self.left_subtree_pushed,
+            self.revno)
 
 
 cdef class _MergeSorter:
@@ -433,220 +423,170 @@
 
     cdef KnownGraph graph
     cdef object _stack  # list
-    cdef object _seen_parents # set of keys for which we have seen a child
+    cdef object _seen_parents # Set of keys
+    cdef object _ms_nodes # dict of key => _MergeSortNode
+    cdef object _revno_to_branch_count # {revno => num child branches}
+    cdef object _completed_node_names # Set of keys that have been completed
+    cdef object _scheduled_nodes # List of nodes ready to be yielded
 
     def __init__(self, known_graph, tip_key):
         self.graph = known_graph
+        self._ms_nodes = {}
+        self._revno_to_branch_count = {}
         self._seen_parents = set()
+        self._stack = []
+        self._completed_node_names = set()
+        self._scheduled_nodes = []
         if tip_key is not None and tip_key != NULL_REVISION:
             node = self.graph._nodes[tip_key]
             self._push_node(node, 0)
 
     cdef _push_node(self, _KnownGraphNode node, Py_ssize_t merge_depth):
         cdef _KnownGraphNode parent_node
+        cdef _MergeSortNode ms_node, ms_parent_node
 
         ms_node = _MergeSortNode()
         ms_node.node = node
         ms_node.merge_depth = merge_depth
         ms_node.left_subtree_pushed = 0
+        # TODO: turn this into a list of pending _MergeSortNode rather than
+        # keys
         ms_node.pending_parents = list(node.parents)
         ms_node.revno = None
         ms_node.is_first_child = 1
+        # ms_node.seen_by_child = 0
         self._stack.append(ms_node)
         if node.parents:
             parent_node = _get_parent(node.parents, 0)
 
+            # TODO: could we use a '.seen' member instead of a set?
+            #       alternatively, track self._ms_nodes = {}, etc.
+            #       If we use _ms_nodes, then we have to be able to create a
+            #       new parent node 'on demand' even when we don't know the
+            #       rest of the info yet.
             if parent_node.key in self._seen_parents:
-                ms_node.is_first_child = True
+                ms_node.is_first_child = 0
             self._seen_parents.add(parent_node.key)
-
-#     def iter_topo_order(self):
-#         """Yield the nodes of the graph in a topological order.
-# 
-#         After finishing iteration the sorter is empty and you cannot continue
-#         iteration.
-#         """
-#         # These are safe to offload to local variables, because they are used
-#         # as a stack and modified in place, never assigned to.
-#         node_name_stack = self._node_name_stack
-#         node_merge_depth_stack = self._node_merge_depth_stack
-#         pending_parents_stack = self._pending_parents_stack
-#         left_subtree_pushed_stack = self._left_subtree_pushed_stack
-#         completed_node_names = self._completed_node_names
-#         scheduled_nodes = self._scheduled_nodes
-# 
-#         graph_pop = self._graph.pop
-# 
-#         def push_node(node_name, merge_depth, parents,
-#                       node_name_stack_append=node_name_stack.append,
-#                       node_merge_depth_stack_append=node_merge_depth_stack.append,
-#                       left_subtree_pushed_stack_append=left_subtree_pushed_stack.append,
-#                       pending_parents_stack_append=pending_parents_stack.append,
-#                       first_child_stack_append=self._first_child_stack.append,
-#                       revnos=self._revnos,
-#                       ):
-#             """Add node_name to the pending node stack.
-# 
-#             Names in this stack will get emitted into the output as they are popped
-#             off the stack.
-# 
-#             This inlines a lot of self._variable.append functions as local
-#             variables.
-#             """
-#             node_name_stack_append(node_name)
-#             node_merge_depth_stack_append(merge_depth)
-#             left_subtree_pushed_stack_append(False)
-#             pending_parents_stack_append(list(parents))
-#             # as we push it, check if it is the first child
-#             if parents:
-#                 # node has parents, assign from the left most parent.
-#                 parent_info = revnos[parents[0]]
-#                 first_child = parent_info[1]
-#                 parent_info[1] = False
-#             else:
-#                 # We don't use the same algorithm here, but we need to keep the
-#                 # stack in line
-#                 first_child = None
-#             first_child_stack_append(first_child)
-# 
-#         def pop_node(node_name_stack_pop=node_name_stack.pop,
-#                      node_merge_depth_stack_pop=node_merge_depth_stack.pop,
-#                      first_child_stack_pop=self._first_child_stack.pop,
-#                      left_subtree_pushed_stack_pop=left_subtree_pushed_stack.pop,
-#                      pending_parents_stack_pop=pending_parents_stack.pop,
-#                      original_graph=self._original_graph,
-#                      revnos=self._revnos,
-#                      completed_node_names_add=self._completed_node_names.add,
-#                      scheduled_nodes_append=scheduled_nodes.append,
-#                      revno_to_branch_count=self._revno_to_branch_count,
-#                     ):
-#             """Pop the top node off the stack
-# 
-#             The node is appended to the sorted output.
-#             """
-#             # we are returning from the flattened call frame:
-#             # pop off the local variables
-#             node_name = node_name_stack_pop()
-#             merge_depth = node_merge_depth_stack_pop()
-#             first_child = first_child_stack_pop()
-#             # remove this node from the pending lists:
-#             left_subtree_pushed_stack_pop()
-#             pending_parents_stack_pop()
-# 
-#             parents = original_graph[node_name]
-#             if parents:
-#                 # node has parents, assign from the left most parent.
-#                 parent_revno = revnos[parents[0]][0]
-#                 if not first_child:
-#                     # not the first child, make a new branch
-#                     base_revno = parent_revno[0]
-#                     branch_count = revno_to_branch_count.get(base_revno, 0)
-#                     branch_count += 1
-#                     revno_to_branch_count[base_revno] = branch_count
-#                     revno = (parent_revno[0], branch_count, 1)
-#                     # revno = (parent_revno[0], branch_count, parent_revno[-1]+1)
-#                 else:
-#                     # as the first child, we just increase the final revision
-#                     # number
-#                     revno = parent_revno[:-1] + (parent_revno[-1] + 1,)
-#             else:
-#                 # no parents, use the root sequence
-#                 root_count = revno_to_branch_count.get(0, -1)
-#                 root_count += 1
-#                 if root_count:
-#                     revno = (0, root_count, 1)
-#                 else:
-#                     revno = (1,)
-#                 revno_to_branch_count[0] = root_count
-# 
-#             # store the revno for this node for future reference
-#             revnos[node_name][0] = revno
-#             completed_node_names_add(node_name)
-#             scheduled_nodes_append((node_name, merge_depth, revno))
-#             return node_name
-# 
-# 
-#         while node_name_stack:
-#             # loop until this call completes.
-#             parents_to_visit = pending_parents_stack[-1]
-#             # if all parents are done, the revision is done
-#             if not parents_to_visit:
-#                 # append the revision to the topo sorted scheduled list:
-#                 # all the nodes parents have been scheduled added, now
-#                 # we can add it to the output.
-#                 pop_node()
-#             else:
-#                 while pending_parents_stack[-1]:
-#                     if not left_subtree_pushed_stack[-1]:
-#                         # recurse depth first into the primary parent
-#                         next_node_name = pending_parents_stack[-1].pop(0)
-#                     else:
-#                         # place any merges in right-to-left order for scheduling
-#                         # which gives us left-to-right order after we reverse
-#                         # the scheduled queue. XXX: This has the effect of
-#                         # allocating common-new revisions to the right-most
-#                         # subtree rather than the left most, which will
-#                         # display nicely (you get smaller trees at the top
-#                         # of the combined merge).
-#                         next_node_name = pending_parents_stack[-1].pop()
-#                     if next_node_name in completed_node_names:
-#                         # this parent was completed by a child on the
-#                         # call stack. skip it.
-#                         continue
-#                     # otherwise transfer it from the source graph into the
-#                     # top of the current depth first search stack.
-#                     try:
-#                         parents = graph_pop(next_node_name)
-#                     except KeyError:
-#                         # if the next node is not in the source graph it has
-#                         # already been popped from it and placed into the
-#                         # current search stack (but not completed or we would
-#                         # have hit the continue 4 lines up.
-#                         # this indicates a cycle.
-#                         raise errors.GraphCycleError(node_name_stack)
-#                     next_merge_depth = 0
-#                     if left_subtree_pushed_stack[-1]:
-#                         # a new child branch from name_stack[-1]
-#                         next_merge_depth = 1
-#                     else:
-#                         next_merge_depth = 0
-#                         left_subtree_pushed_stack[-1] = True
-#                     next_merge_depth = (
-#                         node_merge_depth_stack[-1] + next_merge_depth)
-#                     push_node(
-#                         next_node_name,
-#                         next_merge_depth,
-#                         parents)
-#                     # and do not continue processing parents until this 'call'
-#                     # has recursed.
-#                     break
-# 
-#         # We have scheduled the graph. Now deliver the ordered output:
-#         sequence_number = 0
-#         stop_revision = self._stop_revision
-#         generate_revno = self._generate_revno
-#         original_graph = self._original_graph
-# 
-#         while scheduled_nodes:
-#             node_name, merge_depth, revno = scheduled_nodes.pop()
-#             if node_name == stop_revision:
-#                 return
-#             if not len(scheduled_nodes):
-#                 # last revision is the end of a merge
-#                 end_of_merge = True
-#             elif scheduled_nodes[-1][1] < merge_depth:
-#                 # the next node is to our left
-#                 end_of_merge = True
-#             elif (scheduled_nodes[-1][1] == merge_depth and
-#                   (scheduled_nodes[-1][0] not in
-#                    original_graph[node_name])):
-#                 # the next node was part of a multiple-merge.
-#                 end_of_merge = True
-#             else:
-#                 end_of_merge = False
-#             if generate_revno:
-#                 yield (sequence_number, node_name, merge_depth, revno, end_of_merge)
-#             else:
-#                 yield (sequence_number, node_name, merge_depth, end_of_merge)
-#             sequence_number += 1
-# 
+        self._ms_nodes[ms_node.node.key] = ms_node
+
+    cdef _pop_node(self):
+        cdef _MergeSortNode ms_node, ms_parent_node
+        cdef _KnownGraphNode parent_node
+
+        ms_node = self._stack.pop()
+        if ms_node.node.parents:
+            # Assign the revision number for *this* node, from its left-hand
+            # parent
+            parent_node = _get_parent(ms_node.node.parents, 0)
+            ms_parent_node = self._ms_nodes[parent_node.key]
+            if not ms_node.is_first_child:
+                base_revno = ms_parent_node.revno[0]
+                branch_count = self._revno_to_branch_count.get(base_revno, 0)
+                branch_count = branch_count + 1
+                self._revno_to_branch_count[base_revno] = branch_count
+                revno = (base_revno, branch_count, 1)
+            else:
+                # First child just increments the final digit
+                revno = ms_parent_node.revno[:-1] + (ms_parent_node.revno[-1] +
+                                                        1,)
+        else:
+            root_count = self._revno_to_branch_count.get(0, 0)
+            if root_count == 0:
+                revno = (1,)
+            else:
+                revno = (0, root_count, 1)
+            root_count = root_count + 1
+            self._revno_to_branch_count[0] = root_count
+            print ms_node, root_count
+        ms_node.revno = revno
+        self._completed_node_names.add(ms_node.node.key)
+        self._scheduled_nodes.append(ms_node)
+
+    cdef _schedule_stack(self):
+        cdef _MergeSortNode ms_node, ms_last_node
+        cdef _KnownGraphNode next_node
+        ordered = []
+        while self._stack:
+            # Peek at the last item on the stack
+            # print self._stack
+            # print '  ', self._scheduled_nodes
+            ms_last_node = self._stack[-1]
+            if not ms_last_node.pending_parents:
+                self._pop_node()
+                continue
+            while ms_last_node.pending_parents:
+                if not ms_last_node.left_subtree_pushed:
+                    # recurse depth first into the primary parent
+                    next_node = ms_last_node.pending_parents.pop(0)
+                else:
+                    # place any merges in right-to-left order for scheduling
+                    # which gives us left-to-right order after we reverse
+                    # the scheduled queue. XXX: This has the effect of
+                    # allocating common-new revisions to the right-most
+                    # subtree rather than the left most, which will
+                    # display nicely (you get smaller trees at the top
+                    # of the combined merge).
+                    next_node = ms_last_node.pending_parents.pop()
+                if next_node.key in self._completed_node_names:
+                    # this parent was completed by a child on the
+                    # call stack. skip it.
+                    continue
+                # otherwise transfer it from the source graph into the
+                # top of the current depth first search stack.
+                # TODO: Check for GraphCycleError
+                ## try:
+                ##     parents = graph_pop(next_node_name)
+                ## except KeyError:
+                ##     # if the next node is not in the source graph it has
+                ##     # already been popped from it and placed into the
+                ##     # current search stack (but not completed or we would
+                ##     # have hit the continue 4 lines up.
+                ##     # this indicates a cycle.
+                ##     raise errors.GraphCycleError(self._stack)
+
+                next_merge_depth = 0
+                if ms_last_node.left_subtree_pushed:
+                    next_merge_depth = 1
+                else:
+                    next_merge_depth = 0
+                    ms_last_node.left_subtree_pushed = 1
+                next_merge_depth = next_merge_depth + ms_last_node.merge_depth
+                self._push_node(next_node, next_merge_depth)
+                # and do not continue processing parents until this 'call'
+                # has recursed.
+                break
+
+    cdef topo_order(self):
+        cdef _MergeSortNode ms_node, ms_last_node
+        cdef _KnownGraphNode next_node
+
+        # print
+        self._schedule_stack()
+        # print self._scheduled_nodes
+
+        # We've set up the basic schedule, now we can continue processing the
+        # output.
+        sequence_number = 0
+        ordered = []
+        while self._scheduled_nodes:
+            ms_node = self._scheduled_nodes.pop()
+            # TODO: stop_revision not supported
+            # if ms_node == stop_revision:
+            if len(self._scheduled_nodes) == 0:
+                end_of_merge = True
+            else:
+                ms_last_node = self._scheduled_nodes[-1]
+                if ms_last_node.merge_depth < ms_node.merge_depth:
+                    # Next node is to our left, so this is the end of the right
+                    # chain
+                    end_of_merge = True
+                elif (ms_last_node.merge_depth == ms_node.merge_depth
+                      and ms_last_node.node not in ms_node.node.parents):
+                    # The next node is not a direct parent of this node
+                    end_of_merge = True
+                else:
+                    end_of_merge = False
+            ordered.append((sequence_number, ms_node.node.key,
+                            ms_node.merge_depth, ms_node.revno, end_of_merge))
+            sequence_number = sequence_number + 1
+        return ordered



More information about the bazaar-commits mailing list