Rev 4627: Switch from having a MergeSortNode => KnownGraphNode to in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

John Arbash Meinel john at arbash-meinel.com
Sun Aug 16 17:56:43 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted

------------------------------------------------------------
revno: 4627
revision-id: john at arbash-meinel.com-20090816165630-g94xm2dan6i03dvv
parent: john at arbash-meinel.com-20090816163403-e94dowrraajvf1s4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Sun 2009-08-16 11:56:30 -0500
message:
  Switch from having a MergeSortNode => KnownGraphNode to
  having KnownGraphNode.extra => MergeSortNode
  
  This avoids some of the dict lookups, etc, especially while creating the
  nodes. I'm not 100% sure I like the layering, though.
  
  I suppose it could be considered a 'void*' for processing algorithms to
  use, similar to the \.seen member.
  
   #   92ms graph.KnownGraph().merge_sort()
   #   42ms kg.merge_sort()
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-08-16 16:34:03 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-08-16 16:56:30 +0000
@@ -60,6 +60,7 @@
     cdef object children
     cdef public long gdfo
     cdef int seen
+    cdef object extra
 
     def __init__(self, key):
         cdef int i
@@ -71,6 +72,7 @@
         # Greatest distance from origin
         self.gdfo = -1
         self.seen = 0
+        self.extra = None
 
     property child_keys:
         def __get__(self):
@@ -409,11 +411,10 @@
 cdef class _MergeSortNode:
     """Tracks information about a node during the merge_sort operation."""
 
-    cdef _KnownGraphNode node
     cdef Py_ssize_t merge_depth
-    cdef _MergeSortNode left_ms_parent
-    cdef _MergeSortNode left_pending_parent
-    cdef object pending_parents # list of _MergeSortNode for non-left parents
+    cdef _KnownGraphNode left_parent
+    cdef _KnownGraphNode left_pending_parent
+    cdef object pending_parents # list of _KnownGraphNode for non-left parents
     cdef Py_ssize_t revno_first
     cdef Py_ssize_t revno_second
     cdef Py_ssize_t revno_last
@@ -422,10 +423,9 @@
     cdef int seen_by_child # A child node has seen this parent
     cdef int completed # Fully Processed
 
-    def __init__(self, node):
-        self.node = node
+    def __init__(self):
         self.merge_depth = -1
-        self.left_ms_parent = None
+        self.left_parent = None
         self.left_pending_parent = None
         self.pending_parents = None
         self.revno_first = -1
@@ -436,19 +436,8 @@
         self.completed = 0
 
     def __repr__(self):
-        cdef _MergeSortNode ms_par
-        left_key = None
-        if self.left_ms_parent is not None:
-            left_key = self.left_ms_parent.node.key
-        left_pp = None
-        if self.left_pending_parent is not None:
-            left_pp = self.left_pending_parent.node.key
-        pp = []
-        if self.pending_parents:
-            for ms_par in self.pending_parents:
-                pp.append(ms_par.node.key)
-        return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (self.__class__.__name__,
-            self.node.key, self.merge_depth,
+        return '%s(depth:%s rev:%s,%s,%s first:%s seen:%s)' % (self.__class__.__name__,
+            self.merge_depth,
             self.revno_first, self.revno_second, self.revno_last,
             self.is_first_child, self.seen_by_child)
 
@@ -458,12 +447,12 @@
         return 0
 
 
-cdef _MergeSortNode _get_ms_node(lst, Py_ssize_t pos):
-    cdef PyObject *temp_node
-
-    temp_node = PyList_GET_ITEM(lst, pos)
-    return <_MergeSortNode>temp_node
-
+# cdef _MergeSortNode _get_ms_node(lst, Py_ssize_t pos):
+#     cdef PyObject *temp_node
+# 
+#     temp_node = PyList_GET_ITEM(lst, pos)
+#     return <_MergeSortNode>temp_node
+# 
 
 cdef class _MergeSorter:
     """This class does the work of computing the merge_sort ordering.
@@ -474,98 +463,99 @@
 
     # Current performance numbers for merge_sort(bzr_dev_parent_map):
     #  310ms tsort.merge_sort()
-    #  103ms graph.KnownGraph().merge_sort()
-    #   55ms kg.merge_sort()
+    #   92ms graph.KnownGraph().merge_sort()
+    #   42ms kg.merge_sort()
 
     cdef KnownGraph graph
     cdef object _depth_first_stack  # list
     cdef Py_ssize_t _last_stack_item # offset to last item on stack
-    cdef object _ms_nodes # dict of key => _MergeSortNode
+    # cdef object _ms_nodes # dict of key => _MergeSortNode
     cdef object _revno_to_branch_count # {revno => num child branches}
     cdef object _scheduled_nodes # List of nodes ready to be yielded
 
     def __init__(self, known_graph, tip_key):
         cdef _KnownGraphNode node
-        cdef _MergeSortNode ms_node
+
         self.graph = known_graph
-        self._ms_nodes = {}
+        # self._ms_nodes = {}
         self._revno_to_branch_count = {}
         self._depth_first_stack = []
         self._last_stack_item = -1
         self._scheduled_nodes = []
         if tip_key is not None and tip_key != NULL_REVISION:
             node = self.graph._nodes[tip_key]
-            ms_node = self._get_or_create_node(node)
-            self._push_node(ms_node, 0)
+            self._push_node(node, 0)
 
     cdef _MergeSortNode _get_or_create_node(self, _KnownGraphNode node):
         cdef PyObject *temp_node
         cdef _MergeSortNode ms_node
 
-        temp_node = PyDict_GetItem(self._ms_nodes, node.key)
-        if temp_node == NULL:
-            ms_node = _MergeSortNode(node)
-            PyDict_SetItem(self._ms_nodes, node.key, ms_node)
+        if node.extra is None:
+            ms_node = _MergeSortNode()
+            node.extra = ms_node
         else:
-            ms_node = <_MergeSortNode>temp_node
+            ms_node = <_MergeSortNode>node.extra
         return ms_node
 
-    cdef _push_node(self, _MergeSortNode ms_node, Py_ssize_t merge_depth):
+    cdef _push_node(self, _KnownGraphNode node, Py_ssize_t merge_depth):
         cdef _KnownGraphNode parent_node
-        cdef _MergeSortNode ms_parent_node
+        cdef _MergeSortNode ms_node, ms_parent_node
         cdef Py_ssize_t pos
 
+        ms_node = self._get_or_create_node(node)
         ms_node.merge_depth = merge_depth
-        if PyTuple_GET_SIZE(ms_node.node.parents) > 0:
-            parent_node = _get_parent(ms_node.node.parents, 0)
-            ms_node.left_ms_parent = self._get_or_create_node(
-                parent_node)
-            ms_node.left_pending_parent = ms_node.left_ms_parent
-        if PyTuple_GET_SIZE(ms_node.node.parents) > 1:
-            ms_node.pending_parents = []
-            for pos from 1 <= pos < PyTuple_GET_SIZE(ms_node.node.parents):
-                parent_node = _get_parent(ms_node.node.parents, pos)
-                PyList_Append(ms_node.pending_parents,
-                    self._get_or_create_node(parent_node))
+        if PyTuple_GET_SIZE(node.parents) > 0:
+            parent_node = _get_parent(node.parents, 0)
+            ms_node.left_parent = parent_node
+            ms_node.left_pending_parent = parent_node
+        if PyTuple_GET_SIZE(node.parents) > 1:
+            ms_node.pending_parents = list(node.parents[1:])
+            # ms_node.pending_parents = []
+            # for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
+            #     parent_node = _get_parent(node.parents, pos)
+            #     PyList_Append(ms_node.pending_parents, parent_node)
 
         ms_node.is_first_child = 1
-        if ms_node.left_ms_parent is not None:
-            if ms_node.left_ms_parent.seen_by_child:
+        if ms_node.left_parent is not None:
+            ms_parent_node = self._get_or_create_node(ms_node.left_parent)
+            if ms_parent_node.seen_by_child:
                 ms_node.is_first_child = 0
-            ms_node.left_ms_parent.seen_by_child = 1
+            ms_parent_node.seen_by_child = 1
         self._last_stack_item = self._last_stack_item + 1
         if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
-            Py_INCREF(ms_node) # SetItem steals a ref
+            Py_INCREF(node) # SetItem steals a ref
             PyList_SetItem(self._depth_first_stack, self._last_stack_item,
-                           ms_node)
+                           node)
         else:
-            PyList_Append(self._depth_first_stack, ms_node)
+            PyList_Append(self._depth_first_stack, node)
         # print 'pushed: %s' % (ms_node,)
 
     cdef _pop_node(self):
         cdef PyObject *temp
-        cdef _MergeSortNode ms_node
-        cdef _KnownGraphNode parent_node
+        cdef _MergeSortNode ms_node, ms_parent_node
+        cdef _KnownGraphNode node, parent_node
 
         assert self._last_stack_item >= 0
-        ms_node = _get_ms_node(self._depth_first_stack, self._last_stack_item)
+        node = _get_list_node(self._depth_first_stack, self._last_stack_item)
+        ms_node = <_MergeSortNode>node.extra
         self._last_stack_item = self._last_stack_item - 1
         # print 'popping: %s' % (ms_node,)
-        if ms_node.left_ms_parent is not None:
+        if ms_node.left_parent is not None:
             # Assign the revision number for *this* node, from its left-hand
             # parent
+            ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
             if ms_node.is_first_child:
                 # First child just increments the final digit
-                ms_node.revno_first = ms_node.left_ms_parent.revno_first
-                ms_node.revno_second = ms_node.left_ms_parent.revno_second
-                ms_node.revno_last = ms_node.left_ms_parent.revno_last + 1
+                ms_node.revno_first = ms_parent_node.revno_first
+                ms_node.revno_second = ms_parent_node.revno_second
+                ms_node.revno_last = ms_parent_node.revno_last + 1
             else:
                 # Not the first child, make a new branch
-                if ms_node.left_ms_parent.revno_first == -1:
+                if ms_parent_node.revno_first == -1:
                     # Mainline ancestor, the increment is on the last digit
-                    base_revno = ms_node.left_ms_parent.revno_last
+                    base_revno = ms_parent_node.revno_last
                 else:
-                    base_revno = ms_node.left_ms_parent.revno_first
+                    base_revno = ms_parent_node.revno_first
                 temp = PyDict_GetItem(self._revno_to_branch_count,
                                       base_revno)
                 if temp == NULL:
@@ -574,10 +564,10 @@
                     branch_count = (<object>temp) + 1
                 PyDict_SetItem(self._revno_to_branch_count, base_revno,
                                branch_count)
-                if ms_node.left_ms_parent.revno_first == -1:
-                    ms_node.revno_first = ms_node.left_ms_parent.revno_last
+                if ms_parent_node.revno_first == -1:
+                    ms_node.revno_first = ms_parent_node.revno_last
                 else:
-                    ms_node.revno_first = ms_node.left_ms_parent.revno_first
+                    ms_node.revno_first = ms_parent_node.revno_first
                 ms_node.revno_second = branch_count
                 ms_node.revno_last = 1
         else:
@@ -594,9 +584,10 @@
                 ms_node.revno_last = 1
             self._revno_to_branch_count[0] = root_count
         ms_node.completed = 1
-        PyList_Append(self._scheduled_nodes, ms_node)
+        PyList_Append(self._scheduled_nodes, node)
 
     cdef _schedule_stack(self):
+        cdef _KnownGraphNode last_node, next_node
         cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
         cdef Py_ssize_t next_merge_depth
         ordered = []
@@ -604,8 +595,9 @@
             # Peek at the last item on the stack
             # print self._depth_first_stack
             # print '  ', self._scheduled_nodes
-            ms_last_node = _get_ms_node(self._depth_first_stack,
-                                        self._last_stack_item)
+            last_node = _get_list_node(self._depth_first_stack,
+                                       self._last_stack_item)
+            ms_last_node = <_MergeSortNode>last_node.extra
             if not ms_last_node.has_pending_parents():
                 # Processed all parents, pop this node
                 self._pop_node()
@@ -613,7 +605,7 @@
             while ms_last_node.has_pending_parents():
                 if ms_last_node.left_pending_parent is not None:
                     # recurse depth first into the primary parent
-                    ms_next_node = ms_last_node.left_pending_parent
+                    next_node = ms_last_node.left_pending_parent
                     ms_last_node.left_pending_parent = None
                 else:
                     # place any merges in right-to-left order for scheduling
@@ -623,7 +615,8 @@
                     # subtree rather than the left most, which will
                     # display nicely (you get smaller trees at the top
                     # of the combined merge).
-                    ms_next_node = ms_last_node.pending_parents.pop()
+                    next_node = ms_last_node.pending_parents.pop()
+                ms_next_node = self._get_or_create_node(next_node)
                 if ms_next_node.completed:
                     # this parent was completed by a child on the
                     # call stack. skip it.
@@ -643,19 +636,19 @@
 
                 assert ms_next_node is not None
                 next_merge_depth = 0
-                if ms_next_node is ms_last_node.left_ms_parent:
+                if next_node is ms_last_node.left_parent:
                     next_merge_depth = 0
                 else:
                     next_merge_depth = 1
                 next_merge_depth = next_merge_depth + ms_last_node.merge_depth
-                self._push_node(ms_next_node, next_merge_depth)
+                self._push_node(next_node, next_merge_depth)
                 # and do not continue processing parents until this 'call'
                 # has recursed.
                 break
 
     cdef topo_order(self):
-        cdef _MergeSortNode ms_node, ms_last_node
-        cdef _KnownGraphNode next_node
+        cdef _MergeSortNode ms_node, ms_prev_node
+        cdef _KnownGraphNode node, prev_node
         cdef Py_ssize_t pos
 
         # print
@@ -671,21 +664,27 @@
         ordered = []
         pos = PyList_GET_SIZE(self._scheduled_nodes) - 1
         if pos >= 0:
-            ms_last_node = _get_ms_node(self._scheduled_nodes, pos)
+            prev_node = _get_list_node(self._scheduled_nodes, pos)
+            ms_prev_node = <_MergeSortNode>prev_node.extra
         while pos >= 0:
-            ms_node = ms_last_node
+            if node is not None:
+                # Clear out the extra info we don't need
+                node.extra = None
+            node = prev_node
+            ms_node = ms_prev_node
             pos = pos - 1
             if pos == -1:
                 # Final node is always the end-of-chain
                 end_of_merge = True
             else:
-                ms_last_node = _get_ms_node(self._scheduled_nodes, pos)
-                if ms_last_node.merge_depth < ms_node.merge_depth:
+                prev_node = _get_list_node(self._scheduled_nodes, pos)
+                ms_prev_node = <_MergeSortNode>prev_node.extra
+                if ms_prev_node.merge_depth < ms_node.merge_depth:
                     # Next node is to our left, so this is the end of the right
                     # chain
                     end_of_merge = True
-                elif (ms_last_node.merge_depth == ms_node.merge_depth
-                      and ms_last_node.node not in ms_node.node.parents):
+                elif (ms_prev_node.merge_depth == ms_node.merge_depth
+                      and prev_node not in node.parents):
                     # The next node is not a direct parent of this node
                     end_of_merge = True
                 else:
@@ -697,9 +696,11 @@
             else:
                 revno = (ms_node.revno_first, ms_node.revno_second,
                          ms_node.revno_last)
-            PyList_Append(ordered, (sequence_number, ms_node.node.key,
+            PyList_Append(ordered, (sequence_number, node.key,
                                     ms_node.merge_depth, revno, end_of_merge))
             sequence_number = sequence_number + 1
+        if node is not None:
+            node.extra = None
         # Clear out the scheduled nodes
         self._scheduled_nodes = []
         return ordered



More information about the bazaar-commits mailing list