Rev 4840: Implement KnownGraph.add_node() for the pyrex version. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-kg-add-node

John Arbash Meinel john at arbash-meinel.com
Mon Nov 30 17:25:24 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-kg-add-node

------------------------------------------------------------
revno: 4840
revision-id: john at arbash-meinel.com-20091130172522-m373yel3msmjppey
parent: john at arbash-meinel.com-20091130165755-4hia7he55tjv7180
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b4-kg-add-node
timestamp: Mon 2009-11-30 11:25:22 -0600
message:
  Implement KnownGraph.add_node() for the pyrex version.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-11-30 16:57:55 +0000
+++ b/bzrlib/_known_graph_py.py	2009-11-30 17:25:22 +0000
@@ -63,7 +63,7 @@
         :param parent_map: A dictionary mapping key => parent_keys
         """
         self._nodes = {}
-        # Maps {sorted(revision_id, revision_id): heads}
+        # Maps {frozenset(revision_id, revision_id): heads}
         self._known_heads = {}
         self.do_cache = do_cache
         self._initialize_nodes(parent_map)
@@ -134,18 +134,34 @@
                     known_parent_gdfos[child_key] = known_gdfo
 
     def add_node(self, key, parent_keys):
-        """Add a new node to the graph."""
+        """Add a new node to the graph.
+
+        If this fills in a ghost, then the gdfos of all children will be
+        updated accordingly.
+        
+        :param key: The node being added. If this is a duplicate, this is a
+            no-op.
+        :param parent_keys: The parents of the given node.
+        :return: None (should we return if this was a ghost, etc?)
+        """
         nodes = self._nodes
         if key in nodes:
             node = nodes[key]
             if node.parent_keys is None:
                 node.parent_keys = parent_keys
-            elif parent_keys == node.parent_keys:
-                return # Identical content
+                # A ghost is being added, we can no-longer trust the heads
+                # cache, so clear it
+                self._known_heads.clear()
             else:
-                raise ValueError('Parent key mismatch, existing node %s'
-                    ' has parents of %s not %s'
-                    % (key, node.parent_keys, parent_keys))
+                # Make sure we compare a list to a list, as tuple != list.
+                parent_keys = list(parent_keys)
+                existing_parent_keys = list(node.parent_keys)
+                if parent_keys == existing_parent_keys:
+                    return # Identical content
+                else:
+                    raise ValueError('Parent key mismatch, existing node %s'
+                        ' has parents of %s not %s'
+                        % (key, existing_parent_keys, parent_keys))
         else:
             node = _KnownGraphNode(key, parent_keys)
             nodes[key] = node
@@ -182,7 +198,6 @@
                     child.gdfo = next_gdfo
                     pending.append(child)
 
-
     def heads(self, keys):
         """Return the heads from amongst keys.
 

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-11-30 17:25:22 +0000
@@ -51,6 +51,7 @@
 
     void Py_INCREF(object)
 
+from collections import deque
 import gc
 
 from bzrlib import errors, revision
@@ -192,7 +193,7 @@
     """This is a class which assumes we already know the full graph."""
 
     cdef public object _nodes
-    cdef object _known_heads
+    cdef public object _known_heads
     cdef public int do_cache
 
     def __init__(self, parent_map, do_cache=True):
@@ -232,6 +233,28 @@
             node = <_KnownGraphNode>temp_node
         return node
 
+    cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
+        cdef Py_ssize_t num_parent_keys, pos
+        cdef _KnownGraphNode parent_node
+
+        num_parent_keys = len(parent_keys)
+        # We know how many parents, so we pre allocate the tuple
+        parent_nodes = PyTuple_New(num_parent_keys)
+        for pos from 0 <= pos < num_parent_keys:
+            # Note: it costs us 10ms out of 40ms to lookup all of these
+            #       parents, it doesn't seem to be an allocation overhead,
+            #       but rather a lookup overhead. There doesn't seem to be
+            #       a way around it, and that is one reason why
+            #       KnownGraphNode maintains a direct pointer to the parent
+            #       node.
+            # We use [] because parent_keys may be a tuple or list
+            parent_node = self._get_or_create_node(parent_keys[pos])
+            # PyTuple_SET_ITEM will steal a reference, so INCREF first
+            Py_INCREF(parent_node)
+            PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
+            PyList_Append(parent_node.children, node)
+        node.parents = parent_nodes
+
     def _initialize_nodes(self, parent_map):
         """Populate self._nodes.
 
@@ -242,7 +265,7 @@
           child keys,
         """
         cdef PyObject *temp_key, *temp_parent_keys, *temp_node
-        cdef Py_ssize_t pos, pos2, num_parent_keys
+        cdef Py_ssize_t pos
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
 
@@ -253,24 +276,8 @@
         while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
             key = <object>temp_key
             parent_keys = <object>temp_parent_keys
-            num_parent_keys = len(parent_keys)
             node = self._get_or_create_node(key)
-            # We know how many parents, so we pre allocate the tuple
-            parent_nodes = PyTuple_New(num_parent_keys)
-            for pos2 from 0 <= pos2 < num_parent_keys:
-                # Note: it costs us 10ms out of 40ms to lookup all of these
-                #       parents, it doesn't seem to be an allocation overhead,
-                #       but rather a lookup overhead. There doesn't seem to be
-                #       a way around it, and that is one reason why
-                #       KnownGraphNode maintains a direct pointer to the parent
-                #       node.
-                # We use [] because parent_keys may be a tuple or list
-                parent_node = self._get_or_create_node(parent_keys[pos2])
-                # PyTuple_SET_ITEM will steal a reference, so INCREF first
-                Py_INCREF(parent_node)
-                PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
-                PyList_Append(parent_node.children, node)
-            node.parents = parent_nodes
+            self._populate_parents(node, parent_keys)
 
     def _find_tails(self):
         cdef PyObject *temp_node
@@ -334,6 +341,76 @@
                     # anymore
                     child.seen = 0
 
+    def add_node(self, key, parent_keys):
+        """Add a new node to the graph.
+
+        If this fills in a ghost, then the gdfos of all children will be
+        updated accordingly.
+        
+        :param key: The node being added. If this is a duplicate, this is a
+            no-op.
+        :param parent_keys: The parents of the given node.
+        :return: None (should we return if this was a ghost, etc?)
+        """
+        cdef PyObject *maybe_node
+        cdef _KnownGraphNode node, parent_node, child_node
+        cdef long parent_gdfo, next_gdfo
+
+        maybe_node = PyDict_GetItem(self._nodes, key)
+        if maybe_node != NULL:
+            node = <_KnownGraphNode>maybe_node
+            if node.parents is None:
+                # We are filling in a ghost
+                self._populate_parents(node, parent_keys)
+                # We can't trust cached heads anymore
+                self._known_heads.clear()
+            else: # Ensure that the parent_key list matches
+                existing_parent_keys = []
+                for parent_node in node.parents:
+                    existing_parent_keys.append(parent_node.key)
+                # Make sure we use a list for the comparison, in case it was a
+                # tuple, etc
+                parent_keys = list(parent_keys)
+                if existing_parent_keys == parent_keys:
+                    # Exact match, nothing more to do
+                    return
+                else:
+                    raise ValueError('Parent key mismatch, existing node %s'
+                        ' has parents of %s not %s'
+                        % (key, existing_parent_keys, parent_keys))
+        else:
+            node = _KnownGraphNode(key)
+            PyDict_SetItem(self._nodes, key, node)
+            self._populate_parents(node, parent_keys)
+        parent_gdfo = 0
+        for parent_node in node.parents:
+            if parent_node.gdfo == -1:
+                # This is a newly introduced ghost, so it gets gdfo of 1
+                parent_node.gdfo = 1
+            if parent_gdfo < parent_node.gdfo:
+                parent_gdfo = parent_node.gdfo
+        node.gdfo = parent_gdfo + 1
+        # Now fill the gdfo to all children
+        # Note that this loop is slightly inefficient, in that we may visit the
+        # same child (and its decendents) more than once, however, it is
+        # 'efficient' in that we only walk to nodes that would be updated,
+        # rather than all nodes
+        # We use a deque rather than a simple list stack, to go for BFD rather
+        # than DFD. So that if a longer path is possible, we walk it before we
+        # get to the final child
+        pending = deque([node])
+        pending_popleft = pending.popleft
+        pending_append = pending.append
+        while pending:
+            node = pending_popleft()
+            next_gdfo = node.gdfo + 1
+            for child_node in node.children:
+                if child_node.gdfo < next_gdfo:
+                    # This child is being updated, we need to check its
+                    # children
+                    child_node.gdfo = next_gdfo
+                    pending_append(child_node)
+
     def heads(self, keys):
         """Return the heads from amongst keys.
 

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-11-30 16:57:55 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-11-30 17:25:22 +0000
@@ -161,6 +161,13 @@
         self.assertGDFO(graph, 'rev4', 5)
         graph.add_node('rev4', ['rev3', 'rev2b'])
         self.assertGDFO(graph, 'rev4', 5)
+        # This also works if we use a tuple rather than a list
+        graph.add_node('rev4', ('rev3', 'rev2b'))
+
+    def test_add_existing_node_mismatched_parents(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        self.assertRaises(ValueError, graph.add_node, 'rev4',
+                          ['rev2b', 'rev3'])
 
     def test_add_node_with_ghost_parent(self):
         graph = self.make_known_graph(test_graph.ancestry_1)
@@ -320,6 +327,16 @@
         self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'e', 'g']))
         self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'f']))
 
+    def test_filling_in_ghosts_resets_head_cache(self):
+        if not self.do_cache:
+            raise tests.TestNotApplicable('testing the cache behavior')
+        graph = self.make_known_graph(test_graph.with_ghost)
+        self.assertEqual(set(['e', 'g']), graph.heads(['e', 'g']))
+        # 'g' is filled in, and decends from 'e', so the heads result is now
+        # different
+        graph.add_node('g', ['e'])
+        self.assertEqual(set(['g']), graph.heads(['e', 'g']))
+
 
 class TestKnownGraphTopoSort(TestCaseWithKnownGraph):
 



More information about the bazaar-commits mailing list