Rev 4397: A few more cleanups. Start moving away from pyrex 0.9.8isms in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

Thu Jun 11 17:46:27 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

------------------------------------------------------------
revno: 4397
revision-id: john at arbash-meinel.com-20090611164617-ojhy89zgig81yuez
parent: john at arbash-meinel.com-20090611162226-0r7sa3k0wbe0hfy9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Thu 2009-06-11 11:46:17 -0500
message:
  A few more cleanups. Start moving away from pyrex 0.9.8isms
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'

--- a/bzrlib/_known_graph_py.py	2009-06-10 19:56:16 +0000
+++ b/bzrlib/_known_graph_py.py	2009-06-11 16:46:17 +0000
@@ -147,8 +147,6 @@
                 node = next_node
 
     def _find_gdfo(self):
-        # TODO: Consider moving the tails search into the first-pass over the
-        #       data, inside _find_linear_dominators
         def find_tails():
             return [node for node in self._nodes.itervalues()
                        if not node.parent_keys]
@@ -219,27 +217,9 @@
             return heads
         except KeyError:
             pass # compute it ourselves
-        ## dominator = None
-        ## # TODO: We could optimize for the len(candidate_nodes) > 2 by checking
-        ## #       for *any* pair-wise matching, and then eliminating one of the
-        ## #       nodes trivially. However, the fairly common case is just 2
-        ## #       keys, so we'll focus on that, first
-        ## for node in candidate_nodes.itervalues():
-        ##     if dominator is None:
-        ##         dominator = node.linear_dominator
-        ##     elif dominator != node.linear_dominator:
-        ##         break
-        ## else:
-        ##     # In 'time bzr annotate NEWS' this only catches *one* item, so it
-        ##     # probably isn't worth the optimization
-        ##     # All of these nodes have the same linear_dominator, which means
-        ##     # they are in a line, the head is just the one with the highest
-        ##     # distance
-        ##     def get_distance(key):
-        ##         return self._nodes[key].dominator_distance
-        ##     def get_linear_head():
-        ##         return max(candidate_nodes, key=get_distance)
-        ##     return set([get_linear_head()])
+        # We could do a check here to see if all nodes have the same
+        # 'linear_dominator'. However, in testing, this only was encountered 1
+        # during 'bzr annotate' so it is likely to not be particularly helpful
         dom_key = None
         # Check the linear dominators of these keys, to see if we already
         # know the heads answer

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-06-11 04:28:57 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-06-11 16:46:17 +0000
@@ -36,8 +36,11 @@
 
 from bzrlib import revision
 
+# Define these as cdef objects, so we don't have to getattr them later
+cdef object heappush, heappop, heapify
 heappush = heapq.heappush
 heappop = heapq.heappop
+heapify = heapq.heapify
 
 
 cdef class _KnownGraphNode:
@@ -105,13 +108,17 @@
 
 
 # TODO: slab allocate all _KnownGraphNode objects.
-#       We already know how many we are going to need...
+#       We already know how many we are going to need, except for a couple of
+#       ghosts that could be allocated on demand.
+#       Also, using generic 'list' and 'dict' objects causes pyrex to generate
+#       a PyObject_TypeCheck when walking every node. It isn't terribly
+#       expensive, but it is a bit wasteful.
 
 cdef class KnownGraph:
     """This is a class which assumes we already know the full graph."""
 
     cdef public object _nodes
-    cdef dict _known_heads
+    cdef object _known_heads
     cdef public int do_cache
     # Nodes we've touched that we'll need to reset their info when heads() is
     # done
@@ -147,7 +154,6 @@
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
         cdef list parent_nodes
-        cdef dict nodes
 
         nodes = self._nodes
 
@@ -293,23 +299,18 @@
             information. Callers will need to filter their input to create
             order if they need it.
         """
-        cdef dict candidate_nodes
-        cdef dict dom_to_node
-        cdef dict nodes
-        cdef _KnownGraphNode node
         cdef PyObject *maybe_node
+        cdef PyObject *maybe_heads
 
         heads_key = PyFrozenSet_New(keys)
-        try:
-            heads = self._known_heads[heads_key]
-            return heads
-        except KeyError:
-            pass # compute it ourselves
+        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
+        if maybe_heads != NULL:
+            return <object>maybe_heads
+
+        # Not cached, compute it ourselves
         candidate_nodes = {}
         nodes = self._nodes
         for key in keys:
-            # node = nodes[key]
-            # candidate_nodes[key] = node
             maybe_node = PyDict_GetItem(nodes, key)
             if maybe_node == NULL:
                 raise KeyError('key %s not in nodes' % (key,))
@@ -352,8 +353,8 @@
                        PyFrozenSet_New(dom_heads))
 
     cdef object _heads_from_dominators(self, list candidates):
-        cdef PyObject *test_heads
-        cdef PyObject *test_key
+        cdef PyObject *maybe_heads
+        cdef PyObject *maybe_key
         cdef list heads, dom_list_key
         cdef _KnownGraphNode node
 
@@ -361,27 +362,27 @@
         for node in candidates:
             dom_list_key.append(node.linear_dominator_node.key)
         dom_lookup_key = PyFrozenSet_New(dom_list_key)
-        test_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
-        if test_heads == NULL:
+        maybe_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
+        if maybe_heads == NULL:
             return dom_lookup_key, None
         # We need to map back to the original keys
-        dom_heads = <object>test_heads
+        dom_heads = <object>maybe_heads
         dom_to_key = {}
         for node in candidates:
             PyDict_SetItem(dom_to_key, node.linear_dominator_node.key,
                                        node.key)
         heads = []
         for dom_key in dom_heads:
-            test_key = PyDict_GetItem(dom_to_key, dom_key)
-            if test_key == NULL:
+            maybe_key = PyDict_GetItem(dom_to_key, dom_key)
+            if maybe_key == NULL:
                 # Should never happen
                 raise KeyError
-            heads.append(<object>test_key)
+            heads.append(<object>maybe_key)
         return dom_lookup_key, PyFrozenSet_New(heads)
 
     cdef int _process_parent(self, _KnownGraphNode node,
                              _KnownGraphNode parent_node,
-                             dict candidate_nodes,
+                             candidate_nodes,
                              queue) except -1:
         """Process the parent of a node, seeing if we need to walk it.
 
@@ -411,7 +412,7 @@
             parent_node.ancestor_of = tuple(sorted(all_ancestors))
         return 0
 
-    cdef object _heads_from_candidate_nodes(self, dict candidate_nodes):
+    cdef object _heads_from_candidate_nodes(self, candidate_nodes):
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
         cdef int num_candidates
@@ -424,7 +425,7 @@
             node.ancestor_of = (node.key,)
             queue.append((-node.gdfo, node))
             self._to_cleanup.append(node)
-        heapq.heapify(queue)
+        heapify(queue)
         # These are nodes that we determined are 'common' that we are no longer
         # walking
         # Now we walk nodes until all nodes that are being walked are 'common'