Rev 4420: Simplify gdfo computing by finding tails when at graph build time. in file:///home/vila/src/bzr/experimental/vila-better-heads/

Vincent Ladeuil v.ladeuil+lp at free.fr
Thu Jun 18 19:26:11 BST 2009


At file:///home/vila/src/bzr/experimental/vila-better-heads/

------------------------------------------------------------
revno: 4420
revision-id: v.ladeuil+lp at free.fr-20090618182610-o59r8149nlzb3b68
parent: v.ladeuil+lp at free.fr-20090618141237-k9u7mrithzstg15z
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: vila-better-heads
timestamp: Thu 2009-06-18 20:26:10 +0200
message:
  Simplify gdfo computing by finding tails when at graph build time.
  
  * bzrlib/_known_graph_pyx.pyx:
  (KnownGraph._get_or_create_node): We need to know if the ndoe as
  created.
  (KnownGraph._initialize_nodes): Calulate tails ahead of time to
  intialize gdfo computing.
  (KnownGraph._find_gdfo): Use tails directly.
  
  * bzrlib/_known_graph_py.py:
  (KnownGraph._initialize_nodes): Calulate tails ahead of time to
  intialize gdfo computing.
  (KnownGraph._find_gdfo): Use tails directly.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-06-18 14:12:37 +0000
+++ b/bzrlib/_known_graph_py.py	2009-06-18 18:26:10 +0000
@@ -69,15 +69,23 @@
     def _initialize_nodes(self, parent_map):
         """Populate self._nodes.
 
-        After this has finished, self._nodes will have an entry for every entry
-        in parent_map. Ghosts will have a parent_keys = None, all nodes found
-        will also have .child_keys populated with all known child_keys.
+        After this has finished:
+        - self._nodes will have an entry for every entry in parent_map.
+        - ghosts will have a parent_keys = None,
+        - all nodes found will also have .child_keys populated with all known
+          child_keys,
+        - self._tails will list all the nodes without parents.
         """
+        tails = self._tails = set()
         nodes = self._nodes
         for key, parent_keys in parent_map.iteritems():
             if key in nodes:
                 node = nodes[key]
                 node.parent_keys = parent_keys
+                if parent_keys:
+                    # This node has been added before being seen in parent_map
+                    # (see below)
+                    tails.remove(node)
             else:
                 node = _KnownGraphNode(key, parent_keys)
                 nodes[key] = node
@@ -87,6 +95,9 @@
                 except KeyError:
                     parent_node = _KnownGraphNode(parent_key, None)
                     nodes[parent_key] = parent_node
+                    # Potentially a tail, if we're wrong we'll remove it later
+                    # (see above)
+                    tails.add(parent_node)
                 parent_node.child_keys.append(key)
 
     def _find_linear_dominators(self):
@@ -151,16 +162,19 @@
         pending = []
         known_parent_gdfos = dict.fromkeys(nodes.keys(), 0)
 
-        for node in nodes.values():
-            if not node.parent_keys:
-                node.gdfo = 1
-                known_parent_gdfos[node.key] = 0
-                pending.append(node)
+        for node in self._tails:
+            node.gdfo = 1
+            known_parent_gdfos[node.key] = 0
+            pending.append(node)
+
         while pending:
             node = pending.pop()
             for child_key in node.child_keys:
                 child = nodes[child_key]
-                known_parent_gdfos[child_key] += 1
+                try:
+                    known_parent_gdfos[child_key] += 1
+                except KeyError:
+                    known_parent_gdfos[child_key] = 1
                 if child.gdfo is None or node.gdfo + 1 > child.gdfo:
                     child.gdfo = node.gdfo + 1
                 if known_parent_gdfos[child_key] == len(child.parent_keys):

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-06-18 14:12:37 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-06-18 18:26:10 +0000
@@ -150,6 +150,7 @@
     """This is a class which assumes we already know the full graph."""
 
     cdef public object _nodes
+    cdef public object _tails
     cdef object _known_heads
     cdef public int do_cache
     # Nodes we've touched that we'll need to reset their info when heads() is
@@ -179,7 +180,7 @@
             child = <_KnownGraphNode>temp_node
             child.clear_references()
 
-    cdef _KnownGraphNode _get_or_create_node(self, key):
+    cdef _KnownGraphNode _get_or_create_node(self, key, int *created):
         cdef PyObject *temp_node
         cdef _KnownGraphNode node
 
@@ -187,22 +188,29 @@
         if temp_node == NULL:
             node = _KnownGraphNode(key)
             PyDict_SetItem(self._nodes, key, node)
+            created[0] = 1 # True
         else:
             node = <_KnownGraphNode>temp_node
+            created[0] = 0 # False
         return node
 
     def _initialize_nodes(self, parent_map):
         """Populate self._nodes.
 
-        After this has finished, self._nodes will have an entry for every entry
-        in parent_map. Ghosts will have a parent_keys = None, all nodes found
-        will also have .child_keys populated with all known child_keys.
+        After this has finished:
+        - self._nodes will have an entry for every entry in parent_map.
+        - ghosts will have a parent_keys = None,
+        - all nodes found will also have .child_keys populated with all known
+          child_keys,
+        - self._tails will list all the nodes without parents.
         """
         cdef PyObject *temp_key, *temp_parent_keys, *temp_node
         cdef Py_ssize_t pos, pos2, num_parent_keys
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
+        cdef int created
 
+        tails = self._tails = set()
         nodes = self._nodes
 
         if not PyDict_CheckExact(parent_map):
@@ -212,15 +220,23 @@
         while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
             key = <object>temp_key
             parent_keys = <object>temp_parent_keys
-            node = self._get_or_create_node(key)
+            num_parent_keys = len(parent_keys)
+            node = self._get_or_create_node(key, &created)
+            if not created and num_parent_keys != 0:
+                # This node has been added before being seen in parent_map (see
+                # below)
+                tails.remove(node)
             # We know how many parents, so we could pre allocate an exact sized
             # tuple here
-            num_parent_keys = len(parent_keys)
             parent_nodes = PyTuple_New(num_parent_keys)
             # We use iter here, because parent_keys maybe be a list or tuple
             for pos2 from 0 <= pos2 < num_parent_keys:
-                parent_key = parent_keys[pos2]
-                parent_node = self._get_or_create_node(parent_keys[pos2])
+                parent_node = self._get_or_create_node(parent_keys[pos2],
+                                                       &created)
+                if created:
+                    # Potentially a tail, if we're wrong we'll remove it later
+                    # (see above)
+                    tails.add(parent_node)
                 # PyTuple_SET_ITEM will steal a reference, so INCREF first
                 Py_INCREF(parent_node)
                 PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
@@ -315,18 +331,21 @@
 
         nodes = self._nodes
         pending = []
-        known_parent_gdfos = dict.fromkeys(nodes.keys(), 0)
+        known_parent_gdfos = {}
 
-        for node in nodes.values():
-            if not node.parents:
-                node.gdfo = 1
-                known_parent_gdfos[node.key] = 0
-                pending.append(node)
+        for node in self._tails:
+            node.gdfo = 1
+            known_parent_gdfos[node] = 0
+            pending.append(node)
 
         while pending:
             node = <_KnownGraphNode>pending.pop()
             for child in node.children:
-                known_parent_gdfos[child.key] = known_parent_gdfos[child.key] + 1
+                try:
+                    known_parents = known_parent_gdfos[child.key]
+                except KeyError:
+                    known_parents = 0
+                known_parent_gdfos[child.key] = known_parents + 1
                 if child.gdfo is None or node.gdfo + 1 > child.gdfo:
                     child.gdfo = node.gdfo + 1
                 if known_parent_gdfos[child.key] == len(child.parents):



More information about the bazaar-commits mailing list