Rev 4399: Finish converting away from 'cdef list' syntax. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

John Arbash Meinel john at arbash-meinel.com
Thu Jun 11 20:25:06 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

------------------------------------------------------------
revno: 4399
revision-id: john at arbash-meinel.com-20090611192457-709bcvurmy02itd3
parent: john at arbash-meinel.com-20090611164732-yqh4dyu0i4kwtecj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Thu 2009-06-11 14:24:57 -0500
message:
  Finish converting away from 'cdef list' syntax.
  
  Also, I found that casting a PyObject * directly to _KnownGraphNode avoids
  the TypeTest. Since PyDict_GetItem and PyList_GET_ITEM return borrowed
  references anyway, we need a cast (to auto-incref the pointer), so
  we may as well use them for speed, and avoid the type check at the same
  time.
  Also using PyDict_Next everywhere instead of 'for x in dict.iterX'.
  The nice thing about PyDict_Next is that it always gives you items, but you
  can chose what you actually care about.
  
  
  Also, by doing all these manipulations directly, we shave some more time out
  of building a KnownGraph and out of the heads() calls.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-06-11 16:47:32 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-06-11 19:24:57 +0000
@@ -26,7 +26,16 @@
         pass
 
     object PyFrozenSet_New(object)
-    PyObject * PyDict_GetItem(object d, object k)
+    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
+    Py_ssize_t PyTuple_GET_SIZE(object t)
+    PyObject * PyDict_GetItem(object d, object k)
+    PyObject * PyDict_GetItem(object d, object k)
+    Py_ssize_t PyDict_Size(object d) except -1
+    int PyDict_CheckExact(object d)
+    int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
+    int PyList_Append(object l, object v) except -1
+    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
+    Py_ssize_t PyList_GET_SIZE(object l)
     int PyDict_SetItem(object d, object k, object v) except -1
     int PySet_Add(object s, object k) except -1
     void Py_INCREF(object)
@@ -47,8 +56,8 @@
     """Represents a single object in the known graph."""
 
     cdef object key
-    cdef list parents
-    cdef list children
+    cdef object parents
+    cdef object children
     cdef _KnownGraphNode linear_dominator_node
     cdef public long dominator_distance
     cdef public object gdfo # Int
@@ -74,12 +83,11 @@
 
     property child_keys:
         def __get__(self):
-            cdef list keys
             cdef _KnownGraphNode child
 
             keys = []
             for child in self.children:
-                keys.append(child.key)
+                PyList_Append(keys, child.key)
             return keys
 
     property linear_dominator:
@@ -110,9 +118,6 @@
 # TODO: slab allocate all _KnownGraphNode objects.
 #       We already know how many we are going to need, except for a couple of
 #       ghosts that could be allocated on demand.
-#       Also, using generic 'list' and 'dict' objects causes pyrex to generate
-#       a PyObject_TypeCheck when walking every node. It isn't terribly
-#       expensive, but it is a bit wasteful.
 
 cdef class KnownGraph:
     """This is a class which assumes we already know the full graph."""
@@ -122,7 +127,7 @@
     cdef public int do_cache
     # Nodes we've touched that we'll need to reset their info when heads() is
     # done
-    cdef list _to_cleanup
+    cdef object _to_cleanup
 
     def __init__(self, parent_map, do_cache=True):
         """Create a new KnownGraph instance.
@@ -140,8 +145,11 @@
 
     def __dealloc__(self):
         cdef _KnownGraphNode child
+        cdef Py_ssize_t pos
+        cdef PyObject *temp_node
 
-        for child in self._nodes.itervalues():
+        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+            child = <_KnownGraphNode>temp_node
             child.clear_references()
 
     def _initialize_nodes(self, parent_map):
@@ -151,32 +159,40 @@
         in parent_map. Ghosts will have a parent_keys = None, all nodes found
         will also have .child_keys populated with all known child_keys.
         """
+        cdef PyObject *temp_key, *temp_parent_keys, *temp_node
+        cdef Py_ssize_t pos
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
-        cdef list parent_nodes
 
         nodes = self._nodes
 
-        for key, parent_keys in parent_map.iteritems():
+        if not PyDict_CheckExact(parent_map):
+            raise TypeError('parent_map should be a dict of {key:parent_keys}')
+        # for key, parent_keys in parent_map.iteritems():
+        pos = 0
+        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
+            key = <object>temp_key
+            temp_node = PyDict_GetItem(nodes, key)
+            if temp_node == NULL:
+                node = _KnownGraphNode(key, None)
+                PyDict_SetItem(nodes, key, node)
+            else:
+                node = <_KnownGraphNode>temp_node
+                assert node.parents is None
+            parent_keys = <object>temp_parent_keys
             parent_nodes = []
             for parent_key in parent_keys:
-                try:
-                    parent_node = nodes[parent_key]
-                except KeyError:
+                temp_node = PyDict_GetItem(nodes, parent_key)
+                if temp_node == NULL:
                     parent_node = _KnownGraphNode(parent_key, None)
-                    nodes[parent_key] = parent_node
-                parent_nodes.append(parent_node)
-            if key in nodes:
-                node = nodes[key]
-                assert node.parents is None
-                node.parents = parent_nodes
-            else:
-                node = _KnownGraphNode(key, parent_nodes)
-                nodes[key] = node
-            for parent_node in parent_nodes:
-                parent_node.children.append(node)
+                    PyDict_SetItem(nodes, parent_key, parent_node)
+                else:
+                    parent_node = <_KnownGraphNode>temp_node
+                PyList_Append(parent_nodes, parent_node)
+                PyList_Append(parent_node.children, node)
+            node.parents = parent_nodes
 
-    cdef object _check_is_linear(self, _KnownGraphNode node):
+    cdef _KnownGraphNode _check_is_linear(self, _KnownGraphNode node):
         """Check to see if a given node is part of a linear chain."""
         cdef _KnownGraphNode parent_node
         if node.parents is None or len(node.parents) != 1:
@@ -185,8 +201,8 @@
             node.linear_dominator_node = node
             node.dominator_distance = 0
             return None
-        parent_node = node.parents[0]
-        if len(parent_node.children) > 1:
+        parent_node = <_KnownGraphNode>PyList_GET_ITEM(node.parents, 0)
+        if PyList_GET_SIZE(parent_node.children) > 1:
             # The parent has multiple children, so *this* node is the
             # dominator
             node.linear_dominator_node = node
@@ -210,11 +226,16 @@
         of A. Because there are no interesting siblings, we can quickly skip to
         the nearest dominator when doing comparisons.
         """
+        cdef PyObject *temp_node
+        cdef Py_ssize_t pos
         cdef _KnownGraphNode node
         cdef _KnownGraphNode next_node
-        cdef list stack
+        cdef _KnownGraphNode dominator
+        cdef int i, num_elements
 
-        for node in self._nodes.itervalues():
+        pos = 0
+        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
             # The parent is not filled in, so walk until we get somewhere
             if node.linear_dominator_node is not None: #already done
                 continue
@@ -224,56 +245,66 @@
                 continue
             stack = []
             while next_node is not None:
-                stack.append(node)
+                PyList_Append(stack, node)
                 node = next_node
                 next_node = self._check_is_linear(node)
             # The stack now contains the linear chain, and 'node' should have
             # been labeled
             assert node.linear_dominator_node is not None
             dominator = node.linear_dominator_node
-            while stack:
-                next_node = stack.pop()
+            num_elements = len(stack)
+            for i from num_elements > i >= 0:
+                temp_node = PyList_GET_ITEM(stack, i)
+                next_node = <_KnownGraphNode>temp_node
                 next_node.linear_dominator_node = dominator
                 next_node.dominator_distance = node.dominator_distance + 1
                 node = next_node
 
-    cdef list _find_tails(self):
-        cdef list tails
+    cdef object _find_tails(self):
+        cdef object tails
+        cdef PyObject *temp_node
+        cdef Py_ssize_t pos
         cdef _KnownGraphNode node
+
         tails = []
-
-        for node in self._nodes.itervalues():
-            if not node.parents:
-                tails.append(node)
+        pos = 0
+        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            if node.parents is None or PyList_GET_SIZE(node.parents) == 0:
+                PyList_Append(tails, node)
         return tails
 
     def _find_gdfo(self):
         # TODO: Consider moving the tails search into the first-pass over the
         #       data, inside _find_linear_dominators
+        cdef PyObject *temp_node
+        cdef Py_ssize_t pos, pos2
         cdef _KnownGraphNode node
         cdef _KnownGraphNode child_node
         cdef _KnownGraphNode parent_node
 
         tails = self._find_tails()
         todo = []
-        for node in tails:
+        for pos from 0 <= pos < PyList_GET_SIZE(tails):
+            temp_node = PyList_GET_ITEM(tails, pos)
+            node = <_KnownGraphNode>temp_node
             node.gdfo = 1
             heappush(todo, (1, node))
         max_gdfo = len(self._nodes) + 1
         while todo:
-            gdfo, node = heappop(todo)
-            if node.gdfo != -1 and gdfo < node.gdfo:
-                # This node was reached from a longer path, we assume it was
-                # enqued correctly with the longer gdfo, so don't continue
-                # processing now
-                continue
-            next_gdfo = gdfo + 1
+            temp_node = PyTuple_GET_ITEM(heappop(todo), 1)
+            node = <_KnownGraphNode>temp_node
+            next_gdfo = node.gdfo + 1
             assert next_gdfo <= max_gdfo
-            for child_node in node.children:
+            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
+                temp_node = PyList_GET_ITEM(node.children, pos)
+                child_node = <_KnownGraphNode>temp_node
                 if child_node.gdfo < next_gdfo:
                     # Only enque children when all of their parents have been
                     # resolved
-                    for parent_node in child_node.parents:
+                    for pos2 from 0 <= pos2 < PyList_GET_SIZE(child_node.parents):
+                        temp_node = PyList_GET_ITEM(child_node.parents, pos2)
+                        parent_node = <_KnownGraphNode>temp_node
                         # We know that 'this' parent is counted
                         if parent_node is not node:
                             if parent_node.gdfo == -1:
@@ -324,8 +355,7 @@
             return heads_key
         # Check the linear dominators of these keys, to see if we already
         # know the heads answer
-        dom_lookup_key, heads = self._heads_from_dominators(
-                                    candidate_nodes.values())
+        dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes)
         if heads is not None:
             return heads
         heads = self._heads_from_candidate_nodes(candidate_nodes)
@@ -335,7 +365,6 @@
 
     cdef object _cache_heads(self, heads, heads_key, dom_lookup_key,
                              candidate_nodes):
-        cdef list dom_heads
         cdef PyObject *maybe_node
         cdef _KnownGraphNode node
 
@@ -345,28 +374,33 @@
             maybe_node = PyDict_GetItem(candidate_nodes, key)
             if maybe_node == NULL:
                 raise KeyError
-            node = <object>maybe_node
-            dom_heads.append(node.linear_dominator_node.key)
+            node = <_KnownGraphNode>maybe_node
+            PyList_Append(dom_heads, node.linear_dominator_node.key)
         PyDict_SetItem(self._known_heads, dom_lookup_key,
                        PyFrozenSet_New(dom_heads))
 
-    cdef object _heads_from_dominators(self, list candidates):
+    cdef object _heads_from_dominators(self, candidate_nodes):
         cdef PyObject *maybe_heads
         cdef PyObject *maybe_key
-        cdef list heads, dom_list_key
         cdef _KnownGraphNode node
+        cdef Py_ssize_t pos
+        cdef PyObject *temp_node
 
         dom_list_key = []
-        for node in candidates:
-            dom_list_key.append(node.linear_dominator_node.key)
+        pos = 0
+        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            PyList_Append(dom_list_key, node.linear_dominator_node.key)
         dom_lookup_key = PyFrozenSet_New(dom_list_key)
         maybe_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
         if maybe_heads == NULL:
             return dom_lookup_key, None
-        # We need to map back to the original keys
+        # We need to map back from the dominator head to the original keys
         dom_heads = <object>maybe_heads
         dom_to_key = {}
-        for node in candidates:
+        pos = 0
+        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
             PyDict_SetItem(dom_to_key, node.linear_dominator_node.key,
                                        node.key)
         heads = []
@@ -375,7 +409,7 @@
             if maybe_key == NULL:
                 # Should never happen
                 raise KeyError
-            heads.append(<object>maybe_key)
+            PyList_Append(heads, <object>maybe_key)
         return dom_lookup_key, PyFrozenSet_New(heads)
 
     cdef int _process_parent(self, _KnownGraphNode node,
@@ -399,7 +433,7 @@
             # info directly to parent_node, and enqueue it for later processing
             parent_node.ancestor_of = node.ancestor_of
             heappush(queue, (-parent_node.gdfo, parent_node))
-            self._to_cleanup.append(parent_node)
+            PyList_Append(self._to_cleanup, parent_node)
         elif parent_node.ancestor_of != node.ancestor_of:
             # Combine to get the full set of parents
             # Rewrite using PySet_* functions, unfortunately you have to use
@@ -413,27 +447,33 @@
     cdef object _heads_from_candidate_nodes(self, candidate_nodes):
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
-        cdef int num_candidates
+        cdef Py_ssize_t num_candidates
         cdef int num_parents
-        cdef list queue
+        cdef Py_ssize_t pos
+        cdef PyObject *temp_node
 
         queue = []
-        for node in candidate_nodes.itervalues():
+        pos = 0
+        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
             assert node.ancestor_of is None
             node.ancestor_of = (node.key,)
-            queue.append((-node.gdfo, node))
-            self._to_cleanup.append(node)
+            PyList_Append(queue, (-node.gdfo, node))
+            PyList_Append(self._to_cleanup, node)
         heapify(queue)
         # These are nodes that we determined are 'common' that we are no longer
         # walking
         # Now we walk nodes until all nodes that are being walked are 'common'
         num_candidates = len(candidate_nodes)
-        while len(queue) > 0 and len(candidate_nodes) > 1:
-            _, node = heappop(queue)
-            if len(node.ancestor_of) == num_candidates:
+        while PyList_GET_SIZE(queue) > 0 and PyDict_Size(candidate_nodes) > 1:
+            temp_node = PyTuple_GET_ITEM(heappop(queue), 1)
+            node = <_KnownGraphNode>temp_node
+            if PyTuple_GET_SIZE(node.ancestor_of) == num_candidates:
                 # This node is now considered 'common'
                 # Make sure all parent nodes are marked as such
-                for parent_node in node.parents:
+                for pos from 0 <= pos < PyList_GET_SIZE(node.parents):
+                    temp_node = PyList_GET_ITEM(node.parents, pos)
+                    parent_node = <_KnownGraphNode>temp_node
                     if parent_node.ancestor_of is not None:
                         parent_node.ancestor_of = node.ancestor_of
                 if node.linear_dominator_node is not node:
@@ -454,11 +494,15 @@
                                          candidate_nodes, queue)):
                     break
             else:
-               for parent_node in node.parents:
+               for pos from 0 <= pos < PyList_GET_SIZE(node.parents):
+                   temp_node = PyList_GET_ITEM(node.parents, pos)
+                   parent_node = <_KnownGraphNode>temp_node
                    if (self._process_parent(node, parent_node,
                                             candidate_nodes, queue)):
                        break
-        for node in self._to_cleanup:
+        for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
+            temp_node = PyList_GET_ITEM(self._to_cleanup, pos)
+            node = <_KnownGraphNode>temp_node
             node.ancestor_of = None
         self._to_cleanup = []
         return PyFrozenSet_New(candidate_nodes)



More information about the bazaar-commits mailing list