Rev 3378: Bring in some of the changes from graph_update and graph_optimization in http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_differences
John Arbash Meinel
john at arbash-meinel.com
Tue Apr 22 19:55:17 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_differences
------------------------------------------------------------
revno: 3378
revision-id: john at arbash-meinel.com-20080422184821-wd5n1y365ev60lyq
parent: pqm at pqm.ubuntu.com-20080422120059-sony5sthnlewabge
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: find_differences
timestamp: Tue 2008-04-22 13:48:21 -0500
message:
Bring in some of the changes from graph_update and graph_optimization
_BreadthFirstSearcher.find_seen_ancestors takes a list/set instead of a single node.
find_difference() is still broken, but mostly just in ways that it was already broken.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2008-03-15 13:51:09 +0000
+++ b/bzrlib/graph.py 2008-04-22 18:48:21 +0000
@@ -199,10 +199,12 @@
def find_difference(self, left_revision, right_revision):
"""Determine the graph difference between two revisions"""
- border, common, (left, right) = self._find_border_ancestors(
+ border, common, searchers = self._find_border_ancestors(
[left_revision, right_revision])
- return (left.difference(right).difference(common),
- right.difference(left).difference(common))
+ self._search_for_extra_common(common, searchers)
+ left = searchers[0].seen
+ right = searchers[1].seen
+ return (left.difference(right), right.difference(left))
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
def get_parents(self, revisions):
@@ -261,15 +263,14 @@
border_ancestors = set()
def update_common(searcher, revisions):
w_seen_ancestors = searcher.find_seen_ancestors(
- revision)
+ [revision])
stopped = searcher.stop_searching_any(w_seen_ancestors)
common_ancestors.update(w_seen_ancestors)
common_searcher.start_searching(stopped)
while True:
if len(active_searchers) == 0:
- return border_ancestors, common_ancestors, [s.seen for s in
- searchers]
+ return border_ancestors, common_ancestors, searchers
try:
new_common = common_searcher.next()
common_ancestors.update(new_common)
@@ -387,7 +388,7 @@
new_common.add(ancestor)
for searcher in searchers.itervalues():
seen_ancestors =\
- searcher.find_seen_ancestors(ancestor)
+ searcher.find_seen_ancestors([ancestor])
searcher.stop_searching_any(seen_ancestors)
common_walker.start_searching(new_common)
return candidate_heads
@@ -469,6 +470,163 @@
return set([candidate_descendant]) == self.heads(
[candidate_ancestor, candidate_descendant])
+ def _search_for_extra_common(self, common, searchers):
+ """Make sure that unique nodes are genuinely unique.
+
+ After a simple search, we end up with genuine common nodes, but some
+ uncommon nodes might actually be descended from common nodes, and we
+ just didn't search far enough.
+
+ We know that we have searched enough when all common search tips are
+ descended from all unique (uncommon) nodes because we know that a node
+ cannot be an ancestor of its own ancestor.
+
+ :param common: A set of common nodes
+ :param searchers: The searchers returned from _find_border_ancestors
+ :return: None
+ """
+ # Basic algorithm...
+ # A) The passed in searchers should all be on the same tips, thus
+ # they should be considered the "common" searchers.
+ # B) We find the difference between the searchers, these are the
+ # "unique" nodes for each side.
+ # C) We do a quick culling so that we only start searching from the
+ # more interesting unique nodes. (A unique ancestor is more
+ # interesting that any of its children.)
+ # D) We start searching for ancestors common to all unique nodes.
+ # E) We have the common searchers stop searching any ancestors of
+ # nodes found by (D)
+ # F) When there are no more common search tips, we stop
+ assert len(searchers) == 2, (
+ "Algorithm not yet implemented for > 2 searchers")
+ common_searchers = searchers
+ left_searcher = searchers[0]
+ right_searcher = searchers[1]
+ unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
+ unique = self._remove_simple_descendants(unique,
+ self.get_parent_map(unique))
+ # TODO: jam 20071214 Would it be possible to seed these searchers with
+ # the revisions that we have already seen on each side?
+ # Maybe something like:
+ # unique_searchers = []
+ # for left_unique in left.difference(right):
+ # l_searcher = self._make_breadth_first_searcher(left_unique)
+ # l_searcher.seen.update(left.seen)
+ # ... (same for right.difference(left))
+ # This might also be easier to set up for the case of >2
+ # searchers.
+ unique_searchers = [self._make_breadth_first_searcher([r])
+ for r in unique]
+ # Aggregate all of the searchers into a single common searcher, would
+ # it be okay to do this?
+ # okay to do this?
+ # common_searcher = self._make_breadth_first_searcher([])
+ # for searcher in searchers:
+ # common_searcher.start_searching(searcher.will_search())
+ # common_searcher.seen.update(searcher.seen)
+ common_ancestors_unique = set()
+
+ while True: # If we have no more nodes we have nothing to do
+ # XXX: Any nodes here which don't match between searchers indicate
+ # that we have found a genuinely unique node, which would not
+ # have been found by the other searching techniques
+ newly_seen_common = set()
+ for searcher in common_searchers:
+ newly_seen_common.update(searcher.step())
+ newly_seen_unique = set()
+ for searcher in unique_searchers:
+ newly_seen_unique.update(searcher.step())
+ new_common_unique = set()
+ for revision in newly_seen_unique:
+ if revision in common_ancestors_unique:
+ # TODO: Do we need to add it to new_common_unique, since it
+ # seems to have already been found... ?
+ new_common_unique.add(revision)
+ continue
+ for searcher in unique_searchers:
+ if revision not in searcher.seen:
+ break
+ else:
+ # This is a border because it is a first common that we see
+ # after walking for a while.
+ new_common_unique.add(revision)
+ if newly_seen_common:
+ # These are nodes descended from one of the 'common' searchers.
+ # Make sure all searchers are on the same page
+ for searcher in common_searchers:
+ newly_seen_common.update(searcher.find_seen_ancestors(newly_seen_common))
+ for searcher in common_searchers:
+ searcher.start_searching(newly_seen_common)
+ if new_common_unique:
+ # We found some ancestors that are common, jump all the way to
+ # their most ancestral node that we have already seen.
+ try:
+ for searcher in unique_searchers:
+ new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+ except TypeError:
+ import pdb; pdb.set_trace()
+ raise
+ # Since these are common, we can grab another set of ancestors
+ # that we have seen
+ for searcher in common_searchers:
+ new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+
+ # Now we have a complete set of common nodes which are
+ # ancestors of the unique nodes.
+ # We can tell all of the unique searchers to start at these
+ # nodes, and tell all of the common searchers to *stop*
+ # searching these nodes
+ for searcher in unique_searchers:
+ searcher.start_searching(new_common_unique)
+ for searcher in common_searchers:
+ searcher.stop_searching_any(new_common_unique)
+ common_ancestors_unique.update(new_common_unique)
+ if not newly_seen_common: # No new common nodes, we are done
+ break
+ if not newly_seen_unique: # No new unique nodes, we are done
+ break
+
+
+ def _remove_simple_descendants(self, revisions, parent_map):
+ """remove revisions which are children of other ones in the set
+
+ This doesn't do any graph searching, it just checks the immediate
+ parent_map to find if there are any children which can be removed.
+
+ :param revisions: A set of revision_ids
+ :return: A set of revision_ids with the children removed
+ """
+ simple_ancestors = revisions.copy()
+ # TODO: jam 20071214 we *could* restrict it to searching only the
+ # parent_map of revisions already present in 'revisions', but
+ # considering the general use case, I think this is actually
+ # better.
+
+ # This is the same as the following loop. I don't know that it is any
+ # faster.
+ ## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
+ ## if p_ids is not None and revisions.intersection(p_ids))
+ ## return simple_ancestors
+
+ # Yet Another Way, invert the parent map (which can be cached)
+ ## descendants = {}
+ ## for revision_id, parent_ids in parent_map.iteritems():
+ ## for p_id in parent_ids:
+ ## descendants.setdefault(p_id, []).append(revision_id)
+ ## for revision in revisions.intersection(descendants):
+ ## simple_ancestors.difference_update(descendants[revision])
+ ## return simple_ancestors
+ for revision, parent_ids in parent_map.iteritems():
+ if parent_ids is None:
+ continue
+ for parent_id in parent_ids:
+ if parent_id in revisions:
+ # This node has a parent present in the set, so we can
+ # remove it
+ simple_ancestors.discard(revision)
+ break
+ return simple_ancestors
+
class HeadsCache(object):
"""A cache of results for graph heads calls."""
@@ -582,6 +740,12 @@
return SearchResult(self._started_keys, excludes, len(included_keys),
included_keys)
+ def step(self):
+ try:
+ return self.next()
+ except StopIteration:
+ return ()
+
def next(self):
"""Return the next ancestors of this revision.
@@ -667,16 +831,18 @@
def __iter__(self):
return self
- def find_seen_ancestors(self, revision):
- """Find ancestors of this revision that have already been seen."""
- searcher = _BreadthFirstSearcher([revision], self._parents_provider)
+ def find_seen_ancestors(self, revisions):
+ """Find ancestors of these revisions that have already been seen."""
+ searcher = _BreadthFirstSearcher(revisions, self._parents_provider)
seen_ancestors = set()
for ancestors in searcher:
for ancestor in ancestors:
+ stop_nodes = set()
if ancestor not in self.seen:
- searcher.stop_searching_any([ancestor])
+ stop_nodes.add(ancestor)
else:
seen_ancestors.add(ancestor)
+ searcher.stop_searching_any(stop_nodes)
return seen_ancestors
def stop_searching_any(self, revisions):
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2008-03-10 15:10:47 +0000
+++ b/bzrlib/tests/test_graph.py 2008-04-22 18:48:21 +0000
@@ -164,8 +164,8 @@
# Complex shortcut
# This has a failure mode in that a shortcut will find some nodes in common,
-# but the common searcher won't have time to find that one branch is actually
-# in common. The extra nodes at the top are because we want to avoid
+# but the common searcher won't have time to find that one branche is actually
+# in common. The extra nodes at the beginning are because we want to avoid
# walking off the graph. Specifically, node G should be considered common, but
# is likely to be seen by M long before the common searcher finds it.
#
@@ -181,6 +181,32 @@
# |\
# e f
# | |\
+# | g h
+# |/| |
+# i j |
+# | | |
+# | k |
+# | | |
+# | l |
+# |/|/
+# m n
+complex_shortcut = {'a':[NULL_REVISION], 'b':['a'], 'c':['b'], 'd':['c'],
+ 'e':['d'], 'f':['d'], 'g':['f'], 'h':['f'],
+ 'i':['e', 'g'], 'j':['g'], 'k':['j'],
+ 'l':['k'], 'm':['i', 'l'], 'n':['l', 'h']}
+
+# NULL_REVISION
+# |
+# a
+# |
+# b
+# |
+# c
+# |
+# d
+# |\
+# e f
+# | |\
# i | h
# |\| |
# | g |
@@ -192,7 +218,7 @@
# | l |
# |/|/
# m n
-complex_shortcut = {'d':[NULL_REVISION],
+complex_shortcut2 = {'d':[NULL_REVISION],
'x':['d'], 'y':['x'],
'e':['y'], 'f':['d'], 'g':['f', 'i'], 'h':['f'],
'i':['e'], 'j':['g'], 'k':['j'],
@@ -264,6 +290,10 @@
self.calls.extend(nodes)
return self._real_parents_provider.get_parent_map(nodes)
+ def get_parent_map(self, nodes):
+ self.calls.extend(nodes)
+ return self._real_parents_provider.get_parent_map(nodes)
+
class TestGraph(TestCaseWithMemoryTransport):
@@ -357,6 +387,31 @@
graph.find_unique_lca('rev2a', 'rev2b',
count_steps=True))
+ def assertRemoveDescendants(self, expected, graph, revisions):
+ parents = graph.get_parent_map(revisions)
+ self.assertEqual(expected,
+ graph._remove_simple_descendants(revisions, parents))
+
+ def test__remove_simple_descendants(self):
+ graph = self.make_graph(ancestry_1)
+ self.assertRemoveDescendants(set(['rev1']), graph,
+ set(['rev1', 'rev2a', 'rev2b', 'rev3', 'rev4']))
+
+ def test__remove_simple_descendants_disjoint(self):
+ graph = self.make_graph(ancestry_1)
+ self.assertRemoveDescendants(set(['rev1', 'rev3']), graph,
+ set(['rev1', 'rev3']))
+
+ def test__remove_simple_descendants_chain(self):
+ graph = self.make_graph(ancestry_1)
+ self.assertRemoveDescendants(set(['rev1']), graph,
+ set(['rev1', 'rev2a', 'rev3']))
+
+ def test__remove_simple_descendants_siblings(self):
+ graph = self.make_graph(ancestry_1)
+ self.assertRemoveDescendants(set(['rev2a', 'rev2b']), graph,
+ set(['rev2a', 'rev2b', 'rev3']))
+
def test_unique_lca_criss_cross(self):
"""Ensure we don't pick non-unique lcas in a criss-cross"""
graph = self.make_graph(criss_cross)
@@ -427,9 +482,9 @@
def test_graph_difference_extended_history(self):
graph = self.make_graph(extended_history_shortcut)
- self.expectFailure('find_difference cannot handle shortcuts',
- self.assertEqual, (set(['e']), set(['f'])),
- graph.find_difference('e', 'f'))
+ # self.expectFailure('find_difference cannot handle shortcuts',
+ # self.assertEqual, (set(['e']), set(['f'])),
+ # graph.find_difference('e', 'f'))
self.assertEqual((set(['e']), set(['f'])),
graph.find_difference('e', 'f'))
self.assertEqual((set(['f']), set(['e'])),
@@ -442,17 +497,22 @@
def test_graph_difference_complex_shortcut(self):
graph = self.make_graph(complex_shortcut)
- self.expectFailure('find_difference cannot handle shortcuts',
- self.assertEqual, (set(['m']), set(['h', 'n'])),
- graph.find_difference('m', 'n'))
+ # self.expectFailure('find_difference cannot handle shortcuts',
+ # self.assertEqual, (set(['m']), set(['h', 'n'])),
+ # graph.find_difference('m', 'n'))
+ self.assertEqual((set(['m', 'i', 'e']), set(['n', 'h'])),
+ graph.find_difference('m', 'n'))
+
+ def test_graph_difference_complex_shortcut2(self):
+ graph = self.make_graph(complex_shortcut2)
self.assertEqual((set(['m']), set(['h', 'n'])),
graph.find_difference('m', 'n'))
def test_graph_difference_shortcut_extra_root(self):
graph = self.make_graph(shortcut_extra_root)
- self.expectFailure('find_difference cannot handle shortcuts',
- self.assertEqual, (set(['e']), set(['f', 'g'])),
- graph.find_difference('e', 'f'))
+ # self.expectFailure('find_difference cannot handle shortcuts',
+ # self.assertEqual, (set(['e']), set(['f', 'g'])),
+ # graph.find_difference('e', 'f'))
self.assertEqual((set(['e']), set(['f', 'g'])),
graph.find_difference('e', 'f'))
@@ -641,6 +701,13 @@
self.fail('key deeper was accessed')
result[key] = graph_dict[key]
return result
+ def get_parent_map(keys):
+ result = {}
+ for key in keys:
+ if key == 'deeper':
+ self.fail('key deeper was accessed')
+ result[key] = graph_dict[key]
+ return result
an_obj = stub()
an_obj.get_parent_map = get_parent_map
graph = _mod_graph.Graph(an_obj)
More information about the bazaar-commits
mailing list