Rev 4840: Implement KnownGraph.add_node() for the pyrex version. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-kg-add-node
John Arbash Meinel
john at arbash-meinel.com
Mon Nov 30 17:25:24 GMT 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-kg-add-node
------------------------------------------------------------
revno: 4840
revision-id: john at arbash-meinel.com-20091130172522-m373yel3msmjppey
parent: john at arbash-meinel.com-20091130165755-4hia7he55tjv7180
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b4-kg-add-node
timestamp: Mon 2009-11-30 11:25:22 -0600
message:
Implement KnownGraph.add_node() for the pyrex version.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-11-30 16:57:55 +0000
+++ b/bzrlib/_known_graph_py.py 2009-11-30 17:25:22 +0000
@@ -63,7 +63,7 @@
:param parent_map: A dictionary mapping key => parent_keys
"""
self._nodes = {}
- # Maps {sorted(revision_id, revision_id): heads}
+ # Maps {frozenset(revision_id, revision_id): heads}
self._known_heads = {}
self.do_cache = do_cache
self._initialize_nodes(parent_map)
@@ -134,18 +134,34 @@
known_parent_gdfos[child_key] = known_gdfo
def add_node(self, key, parent_keys):
- """Add a new node to the graph."""
+ """Add a new node to the graph.
+
+ If this fills in a ghost, then the gdfos of all children will be
+ updated accordingly.
+
+ :param key: The node being added. If this is a duplicate, this is a
+ no-op.
+ :param parent_keys: The parents of the given node.
+ :return: None (should we return if this was a ghost, etc?)
+ """
nodes = self._nodes
if key in nodes:
node = nodes[key]
if node.parent_keys is None:
node.parent_keys = parent_keys
- elif parent_keys == node.parent_keys:
- return # Identical content
+ # A ghost is being added, we can no-longer trust the heads
+ # cache, so clear it
+ self._known_heads.clear()
else:
- raise ValueError('Parent key mismatch, existing node %s'
- ' has parents of %s not %s'
- % (key, node.parent_keys, parent_keys))
+ # Make sure we compare a list to a list, as tuple != list.
+ parent_keys = list(parent_keys)
+ existing_parent_keys = list(node.parent_keys)
+ if parent_keys == existing_parent_keys:
+ return # Identical content
+ else:
+ raise ValueError('Parent key mismatch, existing node %s'
+ ' has parents of %s not %s'
+ % (key, existing_parent_keys, parent_keys))
else:
node = _KnownGraphNode(key, parent_keys)
nodes[key] = node
@@ -182,7 +198,6 @@
child.gdfo = next_gdfo
pending.append(child)
-
def heads(self, keys):
"""Return the heads from amongst keys.
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-09-07 14:19:05 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-11-30 17:25:22 +0000
@@ -51,6 +51,7 @@
void Py_INCREF(object)
+from collections import deque
import gc
from bzrlib import errors, revision
@@ -192,7 +193,7 @@
"""This is a class which assumes we already know the full graph."""
cdef public object _nodes
- cdef object _known_heads
+ cdef public object _known_heads
cdef public int do_cache
def __init__(self, parent_map, do_cache=True):
@@ -232,6 +233,28 @@
node = <_KnownGraphNode>temp_node
return node
+ cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
+ cdef Py_ssize_t num_parent_keys, pos
+ cdef _KnownGraphNode parent_node
+
+ num_parent_keys = len(parent_keys)
+ # We know how many parents, so we pre allocate the tuple
+ parent_nodes = PyTuple_New(num_parent_keys)
+ for pos from 0 <= pos < num_parent_keys:
+ # Note: it costs us 10ms out of 40ms to lookup all of these
+ # parents, it doesn't seem to be an allocation overhead,
+ # but rather a lookup overhead. There doesn't seem to be
+ # a way around it, and that is one reason why
+ # KnownGraphNode maintains a direct pointer to the parent
+ # node.
+ # We use [] because parent_keys may be a tuple or list
+ parent_node = self._get_or_create_node(parent_keys[pos])
+ # PyTuple_SET_ITEM will steal a reference, so INCREF first
+ Py_INCREF(parent_node)
+ PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
+ PyList_Append(parent_node.children, node)
+ node.parents = parent_nodes
+
def _initialize_nodes(self, parent_map):
"""Populate self._nodes.
@@ -242,7 +265,7 @@
child keys,
"""
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
- cdef Py_ssize_t pos, pos2, num_parent_keys
+ cdef Py_ssize_t pos
cdef _KnownGraphNode node
cdef _KnownGraphNode parent_node
@@ -253,24 +276,8 @@
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
key = <object>temp_key
parent_keys = <object>temp_parent_keys
- num_parent_keys = len(parent_keys)
node = self._get_or_create_node(key)
- # We know how many parents, so we pre allocate the tuple
- parent_nodes = PyTuple_New(num_parent_keys)
- for pos2 from 0 <= pos2 < num_parent_keys:
- # Note: it costs us 10ms out of 40ms to lookup all of these
- # parents, it doesn't seem to be an allocation overhead,
- # but rather a lookup overhead. There doesn't seem to be
- # a way around it, and that is one reason why
- # KnownGraphNode maintains a direct pointer to the parent
- # node.
- # We use [] because parent_keys may be a tuple or list
- parent_node = self._get_or_create_node(parent_keys[pos2])
- # PyTuple_SET_ITEM will steal a reference, so INCREF first
- Py_INCREF(parent_node)
- PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
- PyList_Append(parent_node.children, node)
- node.parents = parent_nodes
+ self._populate_parents(node, parent_keys)
def _find_tails(self):
cdef PyObject *temp_node
@@ -334,6 +341,76 @@
# anymore
child.seen = 0
+ def add_node(self, key, parent_keys):
+ """Add a new node to the graph.
+
+ If this fills in a ghost, then the gdfos of all children will be
+ updated accordingly.
+
+ :param key: The node being added. If this is a duplicate, this is a
+ no-op.
+ :param parent_keys: The parents of the given node.
+ :return: None (should we return if this was a ghost, etc?)
+ """
+ cdef PyObject *maybe_node
+ cdef _KnownGraphNode node, parent_node, child_node
+ cdef long parent_gdfo, next_gdfo
+
+ maybe_node = PyDict_GetItem(self._nodes, key)
+ if maybe_node != NULL:
+ node = <_KnownGraphNode>maybe_node
+ if node.parents is None:
+ # We are filling in a ghost
+ self._populate_parents(node, parent_keys)
+ # We can't trust cached heads anymore
+ self._known_heads.clear()
+ else: # Ensure that the parent_key list matches
+ existing_parent_keys = []
+ for parent_node in node.parents:
+ existing_parent_keys.append(parent_node.key)
+ # Make sure we use a list for the comparison, in case it was a
+ # tuple, etc
+ parent_keys = list(parent_keys)
+ if existing_parent_keys == parent_keys:
+ # Exact match, nothing more to do
+ return
+ else:
+ raise ValueError('Parent key mismatch, existing node %s'
+ ' has parents of %s not %s'
+ % (key, existing_parent_keys, parent_keys))
+ else:
+ node = _KnownGraphNode(key)
+ PyDict_SetItem(self._nodes, key, node)
+ self._populate_parents(node, parent_keys)
+ parent_gdfo = 0
+ for parent_node in node.parents:
+ if parent_node.gdfo == -1:
+ # This is a newly introduced ghost, so it gets gdfo of 1
+ parent_node.gdfo = 1
+ if parent_gdfo < parent_node.gdfo:
+ parent_gdfo = parent_node.gdfo
+ node.gdfo = parent_gdfo + 1
+ # Now fill the gdfo to all children
+ # Note that this loop is slightly inefficient, in that we may visit the
+ # same child (and its decendents) more than once, however, it is
+ # 'efficient' in that we only walk to nodes that would be updated,
+ # rather than all nodes
+ # We use a deque rather than a simple list stack, to go for BFD rather
+ # than DFD. So that if a longer path is possible, we walk it before we
+ # get to the final child
+ pending = deque([node])
+ pending_popleft = pending.popleft
+ pending_append = pending.append
+ while pending:
+ node = pending_popleft()
+ next_gdfo = node.gdfo + 1
+ for child_node in node.children:
+ if child_node.gdfo < next_gdfo:
+ # This child is being updated, we need to check its
+ # children
+ child_node.gdfo = next_gdfo
+ pending_append(child_node)
+
def heads(self, keys):
"""Return the heads from amongst keys.
=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py 2009-11-30 16:57:55 +0000
+++ b/bzrlib/tests/test__known_graph.py 2009-11-30 17:25:22 +0000
@@ -161,6 +161,13 @@
self.assertGDFO(graph, 'rev4', 5)
graph.add_node('rev4', ['rev3', 'rev2b'])
self.assertGDFO(graph, 'rev4', 5)
+ # This also works if we use a tuple rather than a list
+ graph.add_node('rev4', ('rev3', 'rev2b'))
+
+ def test_add_existing_node_mismatched_parents(self):
+ graph = self.make_known_graph(test_graph.ancestry_1)
+ self.assertRaises(ValueError, graph.add_node, 'rev4',
+ ['rev2b', 'rev3'])
def test_add_node_with_ghost_parent(self):
graph = self.make_known_graph(test_graph.ancestry_1)
@@ -320,6 +327,16 @@
self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'e', 'g']))
self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'f']))
+ def test_filling_in_ghosts_resets_head_cache(self):
+ if not self.do_cache:
+ raise tests.TestNotApplicable('testing the cache behavior')
+ graph = self.make_known_graph(test_graph.with_ghost)
+ self.assertEqual(set(['e', 'g']), graph.heads(['e', 'g']))
+ # 'g' is filled in, and decends from 'e', so the heads result is now
+ # different
+ graph.add_node('g', ['e'])
+ self.assertEqual(set(['g']), graph.heads(['e', 'g']))
+
class TestKnownGraphTopoSort(TestCaseWithKnownGraph):
More information about the bazaar-commits
mailing list