Rev 4849: (jam) Add KnownGraph.add_node() functionality. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Dec 1 15:24:11 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4849 [merge]
revision-id: pqm at pqm.ubuntu.com-20091201152410-bc1esog34wsur9hh
parent: pqm at pqm.ubuntu.com-20091201111614-rphzh10o64wl018g
parent: john at arbash-meinel.com-20091201144013-gdxln2ua9vbeld00
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-12-01 15:24:10 +0000
message:
(jam) Add KnownGraph.add_node() functionality.
modified:
bzrlib/_known_graph_py.py _known_graph_py.py-20090610185421-vw8vfda2cgnckgb1-1
bzrlib/_known_graph_pyx.pyx _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
bzrlib/tests/test__known_graph.py test__known_graph.py-20090610185421-vw8vfda2cgnckgb1-2
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_py.py 2009-11-30 17:25:22 +0000
@@ -17,6 +17,7 @@
"""Implementation of Graph algorithms when we have already loaded everything.
"""
+from collections import deque
from bzrlib import (
errors,
revision,
@@ -62,7 +63,7 @@
:param parent_map: A dictionary mapping key => parent_keys
"""
self._nodes = {}
- # Maps {sorted(revision_id, revision_id): heads}
+ # Maps {frozenset(revision_id, revision_id): heads}
self._known_heads = {}
self.do_cache = do_cache
self._initialize_nodes(parent_map)
@@ -132,6 +133,71 @@
# Update known_parent_gdfos for a key we couldn't process
known_parent_gdfos[child_key] = known_gdfo
+ def add_node(self, key, parent_keys):
+ """Add a new node to the graph.
+
+ If this fills in a ghost, then the gdfos of all children will be
+ updated accordingly.
+
+ :param key: The node being added. If this is a duplicate, this is a
+ no-op.
+ :param parent_keys: The parents of the given node.
+ :return: None (should we return if this was a ghost, etc?)
+ """
+ nodes = self._nodes
+ if key in nodes:
+ node = nodes[key]
+ if node.parent_keys is None:
+ node.parent_keys = parent_keys
+ # A ghost is being added, we can no-longer trust the heads
+ # cache, so clear it
+ self._known_heads.clear()
+ else:
+ # Make sure we compare a list to a list, as tuple != list.
+ parent_keys = list(parent_keys)
+ existing_parent_keys = list(node.parent_keys)
+ if parent_keys == existing_parent_keys:
+ return # Identical content
+ else:
+ raise ValueError('Parent key mismatch, existing node %s'
+ ' has parents of %s not %s'
+ % (key, existing_parent_keys, parent_keys))
+ else:
+ node = _KnownGraphNode(key, parent_keys)
+ nodes[key] = node
+ parent_gdfo = 0
+ for parent_key in parent_keys:
+ try:
+ parent_node = nodes[parent_key]
+ except KeyError:
+ parent_node = _KnownGraphNode(parent_key, None)
+ # Ghosts and roots have gdfo 1
+ parent_node.gdfo = 1
+ nodes[parent_key] = parent_node
+ if parent_gdfo < parent_node.gdfo:
+ parent_gdfo = parent_node.gdfo
+ parent_node.child_keys.append(key)
+ node.gdfo = parent_gdfo + 1
+ # Now fill the gdfo to all children
+ # Note that this loop is slightly inefficient, in that we may visit the
+ # same child (and its decendents) more than once, however, it is
+ # 'efficient' in that we only walk to nodes that would be updated,
+ # rather than all nodes
+ # We use a deque rather than a simple list stack, to go for BFD rather
+ # than DFD. So that if a longer path is possible, we walk it before we
+ # get to the final child
+ pending = deque([node])
+ while pending:
+ node = pending.popleft()
+ next_gdfo = node.gdfo + 1
+ for child_key in node.child_keys:
+ child = nodes[child_key]
+ if child.gdfo < next_gdfo:
+ # This child is being updated, we need to check its
+ # children
+ child.gdfo = next_gdfo
+ pending.append(child)
+
def heads(self, keys):
"""Return the heads from amongst keys.
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-11-30 17:25:22 +0000
@@ -51,6 +51,7 @@
void Py_INCREF(object)
+from collections import deque
import gc
from bzrlib import errors, revision
@@ -192,7 +193,7 @@
"""This is a class which assumes we already know the full graph."""
cdef public object _nodes
- cdef object _known_heads
+ cdef public object _known_heads
cdef public int do_cache
def __init__(self, parent_map, do_cache=True):
@@ -232,6 +233,28 @@
node = <_KnownGraphNode>temp_node
return node
+ cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
+ cdef Py_ssize_t num_parent_keys, pos
+ cdef _KnownGraphNode parent_node
+
+ num_parent_keys = len(parent_keys)
+ # We know how many parents, so we pre allocate the tuple
+ parent_nodes = PyTuple_New(num_parent_keys)
+ for pos from 0 <= pos < num_parent_keys:
+ # Note: it costs us 10ms out of 40ms to lookup all of these
+ # parents, it doesn't seem to be an allocation overhead,
+ # but rather a lookup overhead. There doesn't seem to be
+ # a way around it, and that is one reason why
+ # KnownGraphNode maintains a direct pointer to the parent
+ # node.
+ # We use [] because parent_keys may be a tuple or list
+ parent_node = self._get_or_create_node(parent_keys[pos])
+ # PyTuple_SET_ITEM will steal a reference, so INCREF first
+ Py_INCREF(parent_node)
+ PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
+ PyList_Append(parent_node.children, node)
+ node.parents = parent_nodes
+
def _initialize_nodes(self, parent_map):
"""Populate self._nodes.
@@ -242,7 +265,7 @@
child keys,
"""
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
- cdef Py_ssize_t pos, pos2, num_parent_keys
+ cdef Py_ssize_t pos
cdef _KnownGraphNode node
cdef _KnownGraphNode parent_node
@@ -253,24 +276,8 @@
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
key = <object>temp_key
parent_keys = <object>temp_parent_keys
- num_parent_keys = len(parent_keys)
node = self._get_or_create_node(key)
- # We know how many parents, so we pre allocate the tuple
- parent_nodes = PyTuple_New(num_parent_keys)
- for pos2 from 0 <= pos2 < num_parent_keys:
- # Note: it costs us 10ms out of 40ms to lookup all of these
- # parents, it doesn't seem to be an allocation overhead,
- # but rather a lookup overhead. There doesn't seem to be
- # a way around it, and that is one reason why
- # KnownGraphNode maintains a direct pointer to the parent
- # node.
- # We use [] because parent_keys may be a tuple or list
- parent_node = self._get_or_create_node(parent_keys[pos2])
- # PyTuple_SET_ITEM will steal a reference, so INCREF first
- Py_INCREF(parent_node)
- PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
- PyList_Append(parent_node.children, node)
- node.parents = parent_nodes
+ self._populate_parents(node, parent_keys)
def _find_tails(self):
cdef PyObject *temp_node
@@ -334,6 +341,76 @@
# anymore
child.seen = 0
+ def add_node(self, key, parent_keys):
+ """Add a new node to the graph.
+
+ If this fills in a ghost, then the gdfos of all children will be
+ updated accordingly.
+
+ :param key: The node being added. If this is a duplicate, this is a
+ no-op.
+ :param parent_keys: The parents of the given node.
+ :return: None (should we return if this was a ghost, etc?)
+ """
+ cdef PyObject *maybe_node
+ cdef _KnownGraphNode node, parent_node, child_node
+ cdef long parent_gdfo, next_gdfo
+
+ maybe_node = PyDict_GetItem(self._nodes, key)
+ if maybe_node != NULL:
+ node = <_KnownGraphNode>maybe_node
+ if node.parents is None:
+ # We are filling in a ghost
+ self._populate_parents(node, parent_keys)
+ # We can't trust cached heads anymore
+ self._known_heads.clear()
+ else: # Ensure that the parent_key list matches
+ existing_parent_keys = []
+ for parent_node in node.parents:
+ existing_parent_keys.append(parent_node.key)
+ # Make sure we use a list for the comparison, in case it was a
+ # tuple, etc
+ parent_keys = list(parent_keys)
+ if existing_parent_keys == parent_keys:
+ # Exact match, nothing more to do
+ return
+ else:
+ raise ValueError('Parent key mismatch, existing node %s'
+ ' has parents of %s not %s'
+ % (key, existing_parent_keys, parent_keys))
+ else:
+ node = _KnownGraphNode(key)
+ PyDict_SetItem(self._nodes, key, node)
+ self._populate_parents(node, parent_keys)
+ parent_gdfo = 0
+ for parent_node in node.parents:
+ if parent_node.gdfo == -1:
+ # This is a newly introduced ghost, so it gets gdfo of 1
+ parent_node.gdfo = 1
+ if parent_gdfo < parent_node.gdfo:
+ parent_gdfo = parent_node.gdfo
+ node.gdfo = parent_gdfo + 1
+ # Now fill the gdfo to all children
+ # Note that this loop is slightly inefficient, in that we may visit the
+ # same child (and its decendents) more than once, however, it is
+ # 'efficient' in that we only walk to nodes that would be updated,
+ # rather than all nodes
+ # We use a deque rather than a simple list stack, to go for BFD rather
+ # than DFD. So that if a longer path is possible, we walk it before we
+ # get to the final child
+ pending = deque([node])
+ pending_popleft = pending.popleft
+ pending_append = pending.append
+ while pending:
+ node = pending_popleft()
+ next_gdfo = node.gdfo + 1
+ for child_node in node.children:
+ if child_node.gdfo < next_gdfo:
+ # This child is being updated, we need to check its
+ # children
+ child_node.gdfo = next_gdfo
+ pending_append(child_node)
+
def heads(self, keys):
"""Return the heads from amongst keys.
=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py 2009-09-07 14:19:05 +0000
+++ b/bzrlib/tests/test__known_graph.py 2009-12-01 14:40:13 +0000
@@ -154,6 +154,73 @@
self.assertGDFO(graph, 'a', 5)
self.assertGDFO(graph, 'c', 5)
+ def test_add_existing_node(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ # Add a node that already exists with identical content
+ # This is a 'no-op'
+ self.assertGDFO(graph, 'rev4', 5)
+ graph.add_node('rev4', ['rev3', 'rev2b'])
+ self.assertGDFO(graph, 'rev4', 5)
+ # This also works if we use a tuple rather than a list
+ graph.add_node('rev4', ('rev3', 'rev2b'))
+
+ def test_add_existing_node_mismatched_parents(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ self.assertRaises(ValueError, graph.add_node, 'rev4',
+ ['rev2b', 'rev3'])
+
+ def test_add_node_with_ghost_parent(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ graph.add_node('rev5', ['rev2b', 'revGhost'])
+ self.assertGDFO(graph, 'rev5', 4)
+ self.assertGDFO(graph, 'revGhost', 1)
+
+ def test_add_new_root(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ graph.add_node('rev5', [])
+ self.assertGDFO(graph, 'rev5', 1)
+
+ def test_add_with_all_ghost_parents(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ graph.add_node('rev5', ['ghost'])
+ self.assertGDFO(graph, 'rev5', 2)
+ self.assertGDFO(graph, 'ghost', 1)
+
+ def test_gdfo_after_add_node(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ self.assertEqual([], graph.get_child_keys('rev4'))
+ graph.add_node('rev5', ['rev4'])
+ self.assertEqual(['rev4'], graph.get_parent_keys('rev5'))
+ self.assertEqual(['rev5'], graph.get_child_keys('rev4'))
+ self.assertEqual([], graph.get_child_keys('rev5'))
+ self.assertGDFO(graph, 'rev5', 6)
+ graph.add_node('rev6', ['rev2b'])
+ graph.add_node('rev7', ['rev6'])
+ graph.add_node('rev8', ['rev7', 'rev5'])
+ self.assertGDFO(graph, 'rev5', 6)
+ self.assertGDFO(graph, 'rev6', 4)
+ self.assertGDFO(graph, 'rev7', 5)
+ self.assertGDFO(graph, 'rev8', 7)
+
+ def test_fill_in_ghost(self):
+ graph = self.make_known_graph(test_graph.with_ghost)
+ # Add in a couple nodes and then fill in the 'ghost' so that it should
+ # cause renumbering of children nodes
+ graph.add_node('x', [])
+ graph.add_node('y', ['x'])
+ graph.add_node('z', ['y'])
+ graph.add_node('g', ['z'])
+ self.assertGDFO(graph, 'f', 2)
+ self.assertGDFO(graph, 'e', 3)
+ self.assertGDFO(graph, 'x', 1)
+ self.assertGDFO(graph, 'y', 2)
+ self.assertGDFO(graph, 'z', 3)
+ self.assertGDFO(graph, 'g', 4)
+ self.assertGDFO(graph, 'b', 4)
+ self.assertGDFO(graph, 'd', 5)
+ self.assertGDFO(graph, 'a', 5)
+ self.assertGDFO(graph, 'c', 6)
+
class TestKnownGraphHeads(TestCaseWithKnownGraph):
@@ -260,6 +327,14 @@
self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'e', 'g']))
self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'f']))
+ def test_filling_in_ghosts_resets_head_cache(self):
+ graph = self.make_known_graph(test_graph.with_ghost)
+ self.assertEqual(set(['e', 'g']), graph.heads(['e', 'g']))
+ # 'g' is filled in, and decends from 'e', so the heads result is now
+ # different
+ graph.add_node('g', ['e'])
+ self.assertEqual(set(['g']), graph.heads(['e', 'g']))
+
class TestKnownGraphTopoSort(TestCaseWithKnownGraph):
More information about the bazaar-commits
mailing list