Rev 4849: (jam) Add KnownGraph.add_node() functionality. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Dec 1 15:24:11 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4849 [merge]
revision-id: pqm at pqm.ubuntu.com-20091201152410-bc1esog34wsur9hh
parent: pqm at pqm.ubuntu.com-20091201111614-rphzh10o64wl018g
parent: john at arbash-meinel.com-20091201144013-gdxln2ua9vbeld00
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-12-01 15:24:10 +0000
message:
  (jam) Add KnownGraph.add_node() functionality.
modified:
  bzrlib/_known_graph_py.py      _known_graph_py.py-20090610185421-vw8vfda2cgnckgb1-1
  bzrlib/_known_graph_pyx.pyx    _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
  bzrlib/tests/test__known_graph.py test__known_graph.py-20090610185421-vw8vfda2cgnckgb1-2
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_py.py	2009-11-30 17:25:22 +0000
@@ -17,6 +17,7 @@
 """Implementation of Graph algorithms when we have already loaded everything.
 """
 
+from collections import deque
 from bzrlib import (
     errors,
     revision,
@@ -62,7 +63,7 @@
         :param parent_map: A dictionary mapping key => parent_keys
         """
         self._nodes = {}
-        # Maps {sorted(revision_id, revision_id): heads}
+        # Maps {frozenset(revision_id, revision_id): heads}
         self._known_heads = {}
         self.do_cache = do_cache
         self._initialize_nodes(parent_map)
@@ -132,6 +133,71 @@
                     # Update known_parent_gdfos for a key we couldn't process
                     known_parent_gdfos[child_key] = known_gdfo
 
+    def add_node(self, key, parent_keys):
+        """Add a new node to the graph.
+
+        If this fills in a ghost, then the gdfos of all children will be
+        updated accordingly.
+        
+        :param key: The node being added. If this is a duplicate, this is a
+            no-op.
+        :param parent_keys: The parents of the given node.
+        :return: None (should we return if this was a ghost, etc?)
+        """
+        nodes = self._nodes
+        if key in nodes:
+            node = nodes[key]
+            if node.parent_keys is None:
+                node.parent_keys = parent_keys
+                # A ghost is being added, we can no-longer trust the heads
+                # cache, so clear it
+                self._known_heads.clear()
+            else:
+                # Make sure we compare a list to a list, as tuple != list.
+                parent_keys = list(parent_keys)
+                existing_parent_keys = list(node.parent_keys)
+                if parent_keys == existing_parent_keys:
+                    return # Identical content
+                else:
+                    raise ValueError('Parent key mismatch, existing node %s'
+                        ' has parents of %s not %s'
+                        % (key, existing_parent_keys, parent_keys))
+        else:
+            node = _KnownGraphNode(key, parent_keys)
+            nodes[key] = node
+        parent_gdfo = 0
+        for parent_key in parent_keys:
+            try:
+                parent_node = nodes[parent_key]
+            except KeyError:
+                parent_node = _KnownGraphNode(parent_key, None)
+                # Ghosts and roots have gdfo 1
+                parent_node.gdfo = 1
+                nodes[parent_key] = parent_node
+            if parent_gdfo < parent_node.gdfo:
+                parent_gdfo = parent_node.gdfo
+            parent_node.child_keys.append(key)
+        node.gdfo = parent_gdfo + 1
+        # Now fill the gdfo to all children
+        # Note that this loop is slightly inefficient, in that we may visit the
+        # same child (and its decendents) more than once, however, it is
+        # 'efficient' in that we only walk to nodes that would be updated,
+        # rather than all nodes
+        # We use a deque rather than a simple list stack, to go for BFD rather
+        # than DFD. So that if a longer path is possible, we walk it before we
+        # get to the final child
+        pending = deque([node])
+        while pending:
+            node = pending.popleft()
+            next_gdfo = node.gdfo + 1
+            for child_key in node.child_keys:
+                child = nodes[child_key]
+                if child.gdfo < next_gdfo:
+                    # This child is being updated, we need to check its
+                    # children
+                    child.gdfo = next_gdfo
+                    pending.append(child)
+
     def heads(self, keys):
         """Return the heads from amongst keys.
 

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-11-30 17:25:22 +0000
@@ -51,6 +51,7 @@
 
     void Py_INCREF(object)
 
+from collections import deque
 import gc
 
 from bzrlib import errors, revision
@@ -192,7 +193,7 @@
     """This is a class which assumes we already know the full graph."""
 
     cdef public object _nodes
-    cdef object _known_heads
+    cdef public object _known_heads
     cdef public int do_cache
 
     def __init__(self, parent_map, do_cache=True):
@@ -232,6 +233,28 @@
             node = <_KnownGraphNode>temp_node
         return node
 
+    cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
+        cdef Py_ssize_t num_parent_keys, pos
+        cdef _KnownGraphNode parent_node
+
+        num_parent_keys = len(parent_keys)
+        # We know how many parents, so we pre allocate the tuple
+        parent_nodes = PyTuple_New(num_parent_keys)
+        for pos from 0 <= pos < num_parent_keys:
+            # Note: it costs us 10ms out of 40ms to lookup all of these
+            #       parents, it doesn't seem to be an allocation overhead,
+            #       but rather a lookup overhead. There doesn't seem to be
+            #       a way around it, and that is one reason why
+            #       KnownGraphNode maintains a direct pointer to the parent
+            #       node.
+            # We use [] because parent_keys may be a tuple or list
+            parent_node = self._get_or_create_node(parent_keys[pos])
+            # PyTuple_SET_ITEM will steal a reference, so INCREF first
+            Py_INCREF(parent_node)
+            PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
+            PyList_Append(parent_node.children, node)
+        node.parents = parent_nodes
+
     def _initialize_nodes(self, parent_map):
         """Populate self._nodes.
 
@@ -242,7 +265,7 @@
           child keys,
         """
         cdef PyObject *temp_key, *temp_parent_keys, *temp_node
-        cdef Py_ssize_t pos, pos2, num_parent_keys
+        cdef Py_ssize_t pos
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
 
@@ -253,24 +276,8 @@
         while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
             key = <object>temp_key
             parent_keys = <object>temp_parent_keys
-            num_parent_keys = len(parent_keys)
             node = self._get_or_create_node(key)
-            # We know how many parents, so we pre allocate the tuple
-            parent_nodes = PyTuple_New(num_parent_keys)
-            for pos2 from 0 <= pos2 < num_parent_keys:
-                # Note: it costs us 10ms out of 40ms to lookup all of these
-                #       parents, it doesn't seem to be an allocation overhead,
-                #       but rather a lookup overhead. There doesn't seem to be
-                #       a way around it, and that is one reason why
-                #       KnownGraphNode maintains a direct pointer to the parent
-                #       node.
-                # We use [] because parent_keys may be a tuple or list
-                parent_node = self._get_or_create_node(parent_keys[pos2])
-                # PyTuple_SET_ITEM will steal a reference, so INCREF first
-                Py_INCREF(parent_node)
-                PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
-                PyList_Append(parent_node.children, node)
-            node.parents = parent_nodes
+            self._populate_parents(node, parent_keys)
 
     def _find_tails(self):
         cdef PyObject *temp_node
@@ -334,6 +341,76 @@
                     # anymore
                     child.seen = 0
 
+    def add_node(self, key, parent_keys):
+        """Add a new node to the graph.
+
+        If this fills in a ghost, then the gdfos of all children will be
+        updated accordingly.
+        
+        :param key: The node being added. If this is a duplicate, this is a
+            no-op.
+        :param parent_keys: The parents of the given node.
+        :return: None (should we return if this was a ghost, etc?)
+        """
+        cdef PyObject *maybe_node
+        cdef _KnownGraphNode node, parent_node, child_node
+        cdef long parent_gdfo, next_gdfo
+
+        maybe_node = PyDict_GetItem(self._nodes, key)
+        if maybe_node != NULL:
+            node = <_KnownGraphNode>maybe_node
+            if node.parents is None:
+                # We are filling in a ghost
+                self._populate_parents(node, parent_keys)
+                # We can't trust cached heads anymore
+                self._known_heads.clear()
+            else: # Ensure that the parent_key list matches
+                existing_parent_keys = []
+                for parent_node in node.parents:
+                    existing_parent_keys.append(parent_node.key)
+                # Make sure we use a list for the comparison, in case it was a
+                # tuple, etc
+                parent_keys = list(parent_keys)
+                if existing_parent_keys == parent_keys:
+                    # Exact match, nothing more to do
+                    return
+                else:
+                    raise ValueError('Parent key mismatch, existing node %s'
+                        ' has parents of %s not %s'
+                        % (key, existing_parent_keys, parent_keys))
+        else:
+            node = _KnownGraphNode(key)
+            PyDict_SetItem(self._nodes, key, node)
+            self._populate_parents(node, parent_keys)
+        parent_gdfo = 0
+        for parent_node in node.parents:
+            if parent_node.gdfo == -1:
+                # This is a newly introduced ghost, so it gets gdfo of 1
+                parent_node.gdfo = 1
+            if parent_gdfo < parent_node.gdfo:
+                parent_gdfo = parent_node.gdfo
+        node.gdfo = parent_gdfo + 1
+        # Now fill the gdfo to all children
+        # Note that this loop is slightly inefficient, in that we may visit the
+        # same child (and its decendents) more than once, however, it is
+        # 'efficient' in that we only walk to nodes that would be updated,
+        # rather than all nodes
+        # We use a deque rather than a simple list stack, to go for BFD rather
+        # than DFD. So that if a longer path is possible, we walk it before we
+        # get to the final child
+        pending = deque([node])
+        pending_popleft = pending.popleft
+        pending_append = pending.append
+        while pending:
+            node = pending_popleft()
+            next_gdfo = node.gdfo + 1
+            for child_node in node.children:
+                if child_node.gdfo < next_gdfo:
+                    # This child is being updated, we need to check its
+                    # children
+                    child_node.gdfo = next_gdfo
+                    pending_append(child_node)
+
     def heads(self, keys):
         """Return the heads from amongst keys.
 

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-09-07 14:19:05 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-12-01 14:40:13 +0000
@@ -154,6 +154,73 @@
         self.assertGDFO(graph, 'a', 5)
         self.assertGDFO(graph, 'c', 5)
 
+    def test_add_existing_node(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        # Add a node that already exists with identical content
+        # This is a 'no-op'
+        self.assertGDFO(graph, 'rev4', 5)
+        graph.add_node('rev4', ['rev3', 'rev2b'])
+        self.assertGDFO(graph, 'rev4', 5)
+        # This also works if we use a tuple rather than a list
+        graph.add_node('rev4', ('rev3', 'rev2b'))
+
+    def test_add_existing_node_mismatched_parents(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        self.assertRaises(ValueError, graph.add_node, 'rev4',
+                          ['rev2b', 'rev3'])
+
+    def test_add_node_with_ghost_parent(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        graph.add_node('rev5', ['rev2b', 'revGhost'])
+        self.assertGDFO(graph, 'rev5', 4)
+        self.assertGDFO(graph, 'revGhost', 1)
+
+    def test_add_new_root(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        graph.add_node('rev5', [])
+        self.assertGDFO(graph, 'rev5', 1)
+
+    def test_add_with_all_ghost_parents(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        graph.add_node('rev5', ['ghost'])
+        self.assertGDFO(graph, 'rev5', 2)
+        self.assertGDFO(graph, 'ghost', 1)
+
+    def test_gdfo_after_add_node(self):
+        graph = self.make_known_graph(test_graph.ancestry_1)
+        self.assertEqual([], graph.get_child_keys('rev4'))
+        graph.add_node('rev5', ['rev4'])
+        self.assertEqual(['rev4'], graph.get_parent_keys('rev5'))
+        self.assertEqual(['rev5'], graph.get_child_keys('rev4'))
+        self.assertEqual([], graph.get_child_keys('rev5'))
+        self.assertGDFO(graph, 'rev5', 6)
+        graph.add_node('rev6', ['rev2b'])
+        graph.add_node('rev7', ['rev6'])
+        graph.add_node('rev8', ['rev7', 'rev5'])
+        self.assertGDFO(graph, 'rev5', 6)
+        self.assertGDFO(graph, 'rev6', 4)
+        self.assertGDFO(graph, 'rev7', 5)
+        self.assertGDFO(graph, 'rev8', 7)
+
+    def test_fill_in_ghost(self):
+        graph = self.make_known_graph(test_graph.with_ghost)
+        # Add in a couple nodes and then fill in the 'ghost' so that it should
+        # cause renumbering of children nodes
+        graph.add_node('x', [])
+        graph.add_node('y', ['x'])
+        graph.add_node('z', ['y'])
+        graph.add_node('g', ['z'])
+        self.assertGDFO(graph, 'f', 2)
+        self.assertGDFO(graph, 'e', 3)
+        self.assertGDFO(graph, 'x', 1)
+        self.assertGDFO(graph, 'y', 2)
+        self.assertGDFO(graph, 'z', 3)
+        self.assertGDFO(graph, 'g', 4)
+        self.assertGDFO(graph, 'b', 4)
+        self.assertGDFO(graph, 'd', 5)
+        self.assertGDFO(graph, 'a', 5)
+        self.assertGDFO(graph, 'c', 6)
+
 
 class TestKnownGraphHeads(TestCaseWithKnownGraph):
 
@@ -260,6 +327,14 @@
         self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'e', 'g']))
         self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'f']))
 
+    def test_filling_in_ghosts_resets_head_cache(self):
+        graph = self.make_known_graph(test_graph.with_ghost)
+        self.assertEqual(set(['e', 'g']), graph.heads(['e', 'g']))
+        # 'g' is filled in, and decends from 'e', so the heads result is now
+        # different
+        graph.add_node('g', ['e'])
+        self.assertEqual(set(['g']), graph.heads(['e', 'g']))
+
 
 class TestKnownGraphTopoSort(TestCaseWithKnownGraph):
 




More information about the bazaar-commits mailing list