Rev 4642: Change the KnownGraph.merge_sort api. in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

John Arbash Meinel john at arbash-meinel.com
Mon Aug 17 21:40:59 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

------------------------------------------------------------
revno: 4642
revision-id: john at arbash-meinel.com-20090817204048-e56pce2ol3h22v0x
parent: john at arbash-meinel.com-20090817200626-1m6qaasv2af9x3wx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Mon 2009-08-17 15:40:48 -0500
message:
  Change the KnownGraph.merge_sort api.
  
  Instead of being a tuple api, it is now an Object api. TIMEIT testing at least
  seems to say that _MergeSortNode.key is no slower than using a tuple.
  Allocating the nodes is also not much slower (especially since we already needed
  them.)
  
  In the end it makes for an api which, IMO, is much easier to use, so
  since performance isn't impacted, it is worth switching.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-08-17 20:06:26 +0000
+++ b/bzrlib/_known_graph_py.py	2009-08-17 20:40:48 +0000
@@ -41,6 +41,18 @@
             self.parent_keys, self.child_keys)
 
 
+class _MergeSortNode(object):
+    """Information about a specific node in the merge graph."""
+
+    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
+
+    def __init__(self, key, merge_depth, revno, end_of_merge):
+        self.key = key
+        self.merge_depth = merge_depth
+        self.revno = revno
+        self.end_of_merge = end_of_merge
+
+
 class KnownGraph(object):
     """This is a class which assumes we already know the full graph."""
 
@@ -215,6 +227,8 @@
         # We intentionally always generate revnos and never force the
         # mainline_revisions
         # Strip the sequence_number that merge_sort generates
-        return [info[1:] for info in tsort.merge_sort(as_parent_map, tip_key,
-                                mainline_revisions=None,
-                                generate_revno=True)]
+        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
+                for _, key, merge_depth, revno, end_of_merge
+                 in tsort.merge_sort(as_parent_map, tip_key,
+                                     mainline_revisions=None,
+                                     generate_revno=True)]

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-08-17 18:56:38 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-08-17 20:40:48 +0000
@@ -412,27 +412,32 @@
 cdef class _MergeSortNode:
     """Tracks information about a node during the merge_sort operation."""
 
-    cdef long merge_depth
+    # Public api
+    cdef public object key
+    cdef public long merge_depth
+    cdef public object end_of_merge # True/False Is this the end of the current merge
+
+    # Private api, used while computing the information
     cdef _KnownGraphNode left_parent
     cdef _KnownGraphNode left_pending_parent
     cdef object pending_parents # list of _KnownGraphNode for non-left parents
-    cdef long revno_first
-    cdef long revno_second
-    cdef long revno_last
+    cdef long _revno_first
+    cdef long _revno_second
+    cdef long _revno_last
     # TODO: turn these into flag/bit fields rather than individual members
     cdef int is_first_child # Is this the first child?
     cdef int seen_by_child # A child node has seen this parent
     cdef int completed # Fully Processed
-    cdef object end_of_merge # True/False Is this the end of the current merge
 
-    def __init__(self):
+    def __init__(self, key):
+        self.key = key
         self.merge_depth = -1
         self.left_parent = None
         self.left_pending_parent = None
         self.pending_parents = None
-        self.revno_first = -1
-        self.revno_second = -1
-        self.revno_last = -1
+        self._revno_first = -1
+        self._revno_second = -1
+        self._revno_last = -1
         self.is_first_child = 0
         self.seen_by_child = 0
         self.completed = 0
@@ -440,7 +445,7 @@
     def __repr__(self):
         return '%s(depth:%s rev:%s,%s,%s first:%s seen:%s)' % (self.__class__.__name__,
             self.merge_depth,
-            self.revno_first, self.revno_second, self.revno_last,
+            self._revno_first, self._revno_second, self._revno_last,
             self.is_first_child, self.seen_by_child)
 
     cdef int has_pending_parents(self):
@@ -448,6 +453,18 @@
             return 1
         return 0
 
+    cdef object _revno(self):
+        if self._revno_first == -1:
+            if self._revno_second != -1:
+                raise RuntimeError('Something wrong with: %s' % (self,))
+            return (self._revno_last,)
+        else:
+            return (self._revno_first, self._revno_second, self._revno_last)
+
+    property revno:
+        def __get__(self):
+            return self._revno()
+
 
 cdef class _MergeSorter:
     """This class does the work of computing the merge_sort ordering.
@@ -487,7 +504,7 @@
         cdef _MergeSortNode ms_node
 
         if node.extra is None:
-            ms_node = _MergeSortNode()
+            ms_node = _MergeSortNode(node.key)
             node.extra = ms_node
         else:
             ms_node = <_MergeSortNode>node.extra
@@ -539,17 +556,17 @@
             ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
             if ms_node.is_first_child:
                 # First child just increments the final digit
-                ms_node.revno_first = ms_parent_node.revno_first
-                ms_node.revno_second = ms_parent_node.revno_second
-                ms_node.revno_last = ms_parent_node.revno_last + 1
+                ms_node._revno_first = ms_parent_node._revno_first
+                ms_node._revno_second = ms_parent_node._revno_second
+                ms_node._revno_last = ms_parent_node._revno_last + 1
             else:
                 # Not the first child, make a new branch
                 #  (mainline_revno, branch_count, 1)
-                if ms_parent_node.revno_first == -1:
+                if ms_parent_node._revno_first == -1:
                     # Mainline ancestor, the increment is on the last digit
-                    base_revno = ms_parent_node.revno_last
+                    base_revno = ms_parent_node._revno_last
                 else:
-                    base_revno = ms_parent_node.revno_first
+                    base_revno = ms_parent_node._revno_first
                 temp = PyDict_GetItem(self._revno_to_branch_count,
                                       base_revno)
                 if temp == NULL:
@@ -558,22 +575,22 @@
                     branch_count = (<object>temp) + 1
                 PyDict_SetItem(self._revno_to_branch_count, base_revno,
                                branch_count)
-                ms_node.revno_first = base_revno
-                ms_node.revno_second = branch_count
-                ms_node.revno_last = 1
+                ms_node._revno_first = base_revno
+                ms_node._revno_second = branch_count
+                ms_node._revno_last = 1
         else:
             temp = PyDict_GetItem(self._revno_to_branch_count, 0)
             if temp == NULL:
                 # The first root node doesn't have a 3-digit revno
                 root_count = 0
-                ms_node.revno_first = -1
-                ms_node.revno_second = -1
-                ms_node.revno_last = 1
+                ms_node._revno_first = -1
+                ms_node._revno_second = -1
+                ms_node._revno_last = 1
             else:
                 root_count = (<object>temp) + 1
-                ms_node.revno_first = 0
-                ms_node.revno_second = root_count
-                ms_node.revno_last = 1
+                ms_node._revno_first = 0
+                ms_node._revno_second = root_count
+                ms_node._revno_last = 1
             PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
         ms_node.completed = 1
         if PyList_GET_SIZE(self._scheduled_nodes) == 0:
@@ -665,23 +682,12 @@
         #       7ms is computing the return tuple
         #       4ms is PyList_Append()
         ordered = []
-        # output the result in reverse order, and convert from objects into
-        # tuples...
+        # output the result in reverse order, and separate the generated info
         for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
             node = _get_list_node(self._scheduled_nodes, pos)
             ms_node = <_MergeSortNode>node.extra
-            if ms_node.revno_first == -1:
-                if ms_node.revno_second != -1:
-                    raise RuntimeError('Something wrong with: %s' % (ms_node,))
-                revno = (ms_node.revno_last,)
-            else:
-                revno = (ms_node.revno_first, ms_node.revno_second,
-                         ms_node.revno_last)
-            PyList_Append(ordered, (node.key,
-                                    ms_node.merge_depth, revno,
-                                    ms_node.end_of_merge))
-            # Get rid of the extra stored info
+            PyList_Append(ordered, ms_node)
             node.extra = None
-        # Clear out the scheduled nodes
+        # Clear out the scheduled nodes now that we're done
         self._scheduled_nodes = []
         return ordered

=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py	2009-08-17 18:56:38 +0000
+++ b/bzrlib/branch.py	2009-08-17 20:40:48 +0000
@@ -449,10 +449,8 @@
             last_key = (last_revision,)
             known_graph = self.repository.revisions.get_known_graph_ancestry(
                 [last_key])
-            revs = known_graph.merge_sort(last_key)
-            self._merge_sorted_revisions_cache = [(key[-1], d, rn, eom)
-                for key, d, rn, eom in revs]
-
+            self._merge_sorted_revisions_cache = known_graph.merge_sort(
+                last_key)
         filtered = self._filter_merge_sorted_revisions(
             self._merge_sorted_revisions_cache, start_revision_id,
             stop_revision_id, stop_rule)
@@ -468,27 +466,34 @@
         """Iterate over an inclusive range of sorted revisions."""
         rev_iter = iter(merge_sorted_revisions)
         if start_revision_id is not None:
-            for rev_id, depth, revno, end_of_merge in rev_iter:
+            for node in rev_iter:
+                rev_id = node.key[-1]
                 if rev_id != start_revision_id:
                     continue
                 else:
                     # The decision to include the start or not
                     # depends on the stop_rule if a stop is provided
-                    rev_iter = chain(
-                        iter([(rev_id, depth, revno, end_of_merge)]),
-                        rev_iter)
+                    # so pop this node back into the iterator
+                    rev_iter = chain(iter([node]), rev_iter)
                     break
         if stop_revision_id is None:
-            for rev_id, depth, revno, end_of_merge in rev_iter:
-                yield rev_id, depth, revno, end_of_merge
+            # Yield everything
+            for node in rev_iter:
+                rev_id = node.key[-1]
+                yield (rev_id, node.merge_depth, node.revno,
+                       node.end_of_merge)
         elif stop_rule == 'exclude':
-            for rev_id, depth, revno, end_of_merge in rev_iter:
+            for node in rev_iter:
+                rev_id = node.key[-1]
                 if rev_id == stop_revision_id:
                     return
-                yield rev_id, depth, revno, end_of_merge
+                yield (rev_id, node.merge_depth, node.revno,
+                       node.end_of_merge)
         elif stop_rule == 'include':
-            for rev_id, depth, revno, end_of_merge in rev_iter:
-                yield rev_id, depth, revno, end_of_merge
+            for node in rev_iter:
+                rev_id = node.key[-1]
+                yield (rev_id, node.merge_depth, node.revno,
+                       node.end_of_merge)
                 if rev_id == stop_revision_id:
                     return
         elif stop_rule == 'with-merges':
@@ -497,10 +502,12 @@
                 left_parent = stop_rev.parent_ids[0]
             else:
                 left_parent = _mod_revision.NULL_REVISION
-            for rev_id, depth, revno, end_of_merge in rev_iter:
+            for node in rev_iter:
+                rev_id = node.key[-1]
                 if rev_id == left_parent:
                     return
-                yield rev_id, depth, revno, end_of_merge
+                yield (rev_id, node.merge_depth, node.revno,
+                       node.end_of_merge)
         else:
             raise ValueError('invalid stop_rule %r' % stop_rule)
 

=== modified file 'bzrlib/repofmt/weaverepo.py'
--- a/bzrlib/repofmt/weaverepo.py	2009-04-09 20:23:07 +0000
+++ b/bzrlib/repofmt/weaverepo.py	2009-08-17 20:40:48 +0000
@@ -28,6 +28,7 @@
 lazy_import(globals(), """
 from bzrlib import (
     xml5,
+    graph as _mod_graph,
     )
 """)
 from bzrlib import (
@@ -669,6 +670,13 @@
             result[key] = parents
         return result
 
+    def get_known_graph_ancestry(self, keys):
+        """Get a KnownGraph instance with the ancestry of keys."""
+        keys = self.keys()
+        parent_map = self.get_parent_map(keys)
+        kg = _mod_graph.KnownGraph(parent_map)
+        return kg
+
     def get_record_stream(self, keys, sort_order, include_delta_closure):
         for key in keys:
             text, parents = self._load_text_parents(key)

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-08-17 18:56:38 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-08-17 20:40:48 +0000
@@ -336,6 +336,8 @@
         """Check that merge based sorting and iter_topo_order on graph works."""
         graph = self.make_known_graph(ancestry)
         value = graph.merge_sort(branch_tip)
+        value = [(n.key, n.merge_depth, n.revno, n.end_of_merge)
+                 for n in value]
         if result_list != value:
             self.assertEqualDiff(pprint.pformat(result_list),
                                  pprint.pformat(value))



More information about the bazaar-commits mailing list