Rev 3181: (robertc) Add next_with_ghosts to the api on breadth-first-searchers. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Jan 15 02:01:34 GMT 2008
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3181
revision-id:pqm at pqm.ubuntu.com-20080115020129-jl22ugxkca1rox94
parent: pqm at pqm.ubuntu.com-20080115003405-jfuumkpctmvl2e4r
parent: robertc at robertcollins.net-20080114231145-bv85r8wufwkfm9ee
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2008-01-15 02:01:29 +0000
message:
(robertc) Add next_with_ghosts to the api on breadth-first-searchers.
(Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
------------------------------------------------------------
revno: 3177.3.3
revision-id:robertc at robertcollins.net-20080114231145-bv85r8wufwkfm9ee
parent: robertc at robertcollins.net-20080114005456-d4a5iief649hmtiy
committer: Robert Collins <robertc at robertcollins.net>
branch nick: breadth-first-ghosts
timestamp: Tue 2008-01-15 10:11:45 +1100
message:
Review feedback.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
------------------------------------------------------------
revno: 3177.3.2
revision-id:robertc at robertcollins.net-20080114005456-d4a5iief649hmtiy
parent: robertc at robertcollins.net-20080113235717-9a1w22q93j81nd0o
committer: Robert Collins <robertc at robertcollins.net>
branch nick: breadth-first-ghosts
timestamp: Mon 2008-01-14 11:54:56 +1100
message:
Update graph searchers stop_searching_any and start_searching for next_with_ghosts.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
------------------------------------------------------------
revno: 3177.3.1
revision-id:robertc at robertcollins.net-20080113235717-9a1w22q93j81nd0o
parent: pqm at pqm.ubuntu.com-20080111214408-cpkslxu95eg5c45u
committer: Robert Collins <robertc at robertcollins.net>
branch nick: breadth-first-ghosts
timestamp: Mon 2008-01-14 10:57:17 +1100
message:
* New method ``next_with_ghosts`` on the Graph breadth-first-search objects
which will split out ghosts and present parents into two separate sets,
useful for code which needs to be aware of ghosts (e.g. fetching data
cares about ghosts during revision selection). (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
=== modified file 'NEWS'
--- a/NEWS 2008-01-14 22:49:24 +0000
+++ b/NEWS 2008-01-15 02:01:29 +0000
@@ -170,6 +170,11 @@
inventories. This is primarily used by the ``revision_trees`` method, as
direct access to inventories is discouraged. (Robert Collins)
+ * New method ``next_with_ghosts`` on the Graph breadth-first-search objects
+ which will split out ghosts and present parents into two separate sets,
+ useful for code which needs to be aware of ghosts (e.g. fetching data
+ cares about ghosts during revision selection). (Robert Collins)
+
* Parent Providers should now implement ``get_parent_map`` returning a
dictionary instead of ``get_parents`` returning a list.
``Graph.get_parents`` is now deprecated. (John Arbash Meinel,
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2008-01-11 00:44:17 +0000
+++ b/bzrlib/graph.py 2008-01-14 23:11:45 +0000
@@ -506,39 +506,93 @@
"""
def __init__(self, revisions, parents_provider):
- self._start = set(revisions)
- self._search_revisions = None
- self.seen = set(revisions)
+ self._iterations = 0
+ self._next_query = set(revisions)
+ self.seen = set()
self._parents_provider = parents_provider
+ self._returning = 'next_with_ghosts'
def __repr__(self):
- if self._search_revisions is not None:
- search = 'searching=%r' % (list(self._search_revisions),)
+ if self._iterations:
+ prefix = "searching"
else:
- search = 'starting=%r' % (list(self._start),)
- return ('_BreadthFirstSearcher(%s,'
- ' seen=%r)' % (search, list(self.seen)))
+ prefix = "starting"
+ search = '%s=%r' % (prefix, list(self._next_query))
+ return ('_BreadthFirstSearcher(iterations=%d, %s,'
+ ' seen=%r)' % (self._iterations, search, list(self.seen)))
def next(self):
"""Return the next ancestors of this revision.
Ancestors are returned in the order they are seen in a breadth-first
- traversal. No ancestor will be returned more than once.
+ traversal. No ancestor will be returned more than once. Ancestors are
+ returned before their parentage is queried, so ghosts and missing
+ revisions (including the start revisions) are included in the result.
+ This can save a round trip in LCA style calculation by allowing
+ convergence to be detected without reading the data for the revision
+ the convergence occurs on.
+
+ :return: A set of revision_ids.
"""
- if self._search_revisions is None:
- self._search_revisions = self._start
+ if self._returning != 'next':
+ # switch to returning the query, not the results.
+ self._returning = 'next'
+ self._iterations += 1
+ self.seen.update(self._next_query)
else:
- new_search_revisions = set()
- parent_map = self._parents_provider.get_parent_map(
- self._search_revisions)
- for parents in parent_map.itervalues():
- new_search_revisions.update(p for p in parents if
- p not in self.seen)
- self._search_revisions = new_search_revisions
- if len(self._search_revisions) == 0:
- raise StopIteration()
- self.seen.update(self._search_revisions)
- return self._search_revisions
+ self._advance()
+ if len(self._next_query) == 0:
+ raise StopIteration()
+ return self._next_query
+
+ def next_with_ghosts(self):
+ """Return the next found ancestors, with ghosts split out.
+
+ Ancestors are returned in the order they are seen in a breadth-first
+ traversal. No ancestor will be returned more than once. Ancestors are
+ returned only after asking for their parents, which allows us to detect
+ which revisions are ghosts and which are not.
+
+ :return: A tuple with (present ancestors, ghost ancestors) sets.
+ """
+ if self._returning != 'next_with_ghosts':
+ # switch to returning the results, not the current query.
+ self._returning = 'next_with_ghosts'
+ self._advance()
+ if len(self._next_query) == 0:
+ raise StopIteration()
+ self._advance()
+ return self._current_present, self._current_ghosts
+
+ def _advance(self):
+ """Advance the search.
+
+ Updates self.seen, self._next_query, self._current_present,
+ self._current_ghosts, self._current_parents and self._iterations.
+ """
+ self._iterations += 1
+ found, ghosts, next, parents = self._do_query(self._next_query)
+ self._current_present = found
+ self._current_ghosts = ghosts
+ self._next_query = next
+ self._current_parents = parents
+
+ def _do_query(self, revisions):
+ """Query for revisions.
+
+ :param revisions: Revisions to query.
+ :return: A tuple: (set(found_revisions), set(ghost_revisions),
+ set(parents_of_found_revisions), dict(found_revisions:parents)).
+ """
+ found_parents = set()
+ parents_of_found = set()
+ parent_map = self._parents_provider.get_parent_map(revisions)
+ for rev_id, parents in parent_map.iteritems():
+ found_parents.add(rev_id)
+ parents_of_found.update(p for p in parents if p not in self.seen)
+ ghost_parents = revisions - found_parents
+ self.seen.update(parents_of_found)
+ return found_parents, ghost_parents, parents_of_found, parent_map
def __iter__(self):
return self
@@ -562,13 +616,56 @@
None of the specified revisions are required to be present in the
search list. In this case, the call is a no-op.
"""
- stopped = self._search_revisions.intersection(revisions)
- self._search_revisions = self._search_revisions.difference(revisions)
+ revisions = frozenset(revisions)
+ if self._returning == 'next':
+ stopped = self._next_query.intersection(revisions)
+ self._next_query = self._next_query.difference(revisions)
+ else:
+ stopped = set()
+ stopped.update(self._current_present.intersection(revisions))
+ stopped.update(self._current_ghosts.intersection(revisions))
+ self._current_present.difference_update(stopped)
+ self._current_ghosts.difference_update(stopped)
+ # stopping 'x' should stop returning parents of 'x', but
+ # not if 'y' always references those same parents
+ stop_rev_references = {}
+ for rev in stopped:
+ for parent_id in self._current_parents[rev]:
+ if parent_id not in stop_rev_references:
+ stop_rev_references[parent_id] = 0
+ stop_rev_references[parent_id] += 1
+ # if only the stopped revisions reference it, the ref count will be
+ # 0 after this loop
+ for parents in self._current_parents.itervalues():
+ for parent_id in parents:
+ try:
+ stop_rev_references[parent_id] -= 1
+ except KeyError:
+ pass
+ stop_parents = set()
+ for rev_id, refs in stop_rev_references.iteritems():
+ if refs == 0:
+ stop_parents.add(rev_id)
+ self._next_query.difference_update(stop_parents)
return stopped
def start_searching(self, revisions):
- if self._search_revisions is None:
- self._start = set(revisions)
+ """Add revisions to the search.
+
+ The parents of revisions will be returned from the next call to next()
+ or next_with_ghosts(). If next_with_ghosts was the most recently used
+ next* call then the return value is the result of looking up the
+ ghost/not ghost status of revisions. (A tuple (present, ghosted)).
+ """
+ revisions = frozenset(revisions)
+ if self._returning == 'next':
+ self._next_query.update(revisions.difference(self.seen))
+ self.seen.update(revisions)
else:
- self._search_revisions.update(revisions.difference(self.seen))
- self.seen.update(revisions)
+ # perform a query on revisions
+ revs, ghosts, query, parents = self._do_query(revisions)
+ self._current_present.update(revs)
+ self._current_ghosts.update(ghosts)
+ self._next_query.update(query)
+ self._current_parents.update(parents)
+ return revs, ghosts
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2008-01-11 00:44:17 +0000
+++ b/bzrlib/tests/test_graph.py 2008-01-14 23:11:45 +0000
@@ -652,6 +652,100 @@
self.assertEqual(set(['h1', 'h2']),
self._run_heads_break_deeper(graph_dict, ['h1', 'h2']))
+ def test_breadth_first_search_start_ghosts(self):
+ parent_graph = {}
+ parents_provider = InstrumentedParentsProvider(
+ _mod_graph.DictParentsProvider(parent_graph))
+ graph = _mod_graph.Graph(parents_provider)
+ # with_ghosts reports the ghosts
+ search = graph._make_breadth_first_searcher(['a-ghost'])
+ self.assertEqual((set(), set(['a-ghost'])), search.next_with_ghosts())
+ self.assertRaises(StopIteration, search.next_with_ghosts)
+ # next includes them
+ search = graph._make_breadth_first_searcher(['a-ghost'])
+ self.assertEqual(set(['a-ghost']), search.next())
+ self.assertRaises(StopIteration, search.next)
+
+ def test_breadth_first_search_deep_ghosts(self):
+ parent_graph = {
+ 'head':['present'],
+ 'present':['child', 'ghost'],
+ 'child':[],
+ }
+ parents_provider = InstrumentedParentsProvider(
+ _mod_graph.DictParentsProvider(parent_graph))
+ graph = _mod_graph.Graph(parents_provider)
+ # with_ghosts reports the ghosts
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual((set(['head']), set()), search.next_with_ghosts())
+ self.assertEqual((set(['present']), set()), search.next_with_ghosts())
+ self.assertEqual((set(['child']), set(['ghost'])),
+ search.next_with_ghosts())
+ self.assertRaises(StopIteration, search.next_with_ghosts)
+ # next includes them
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual(set(['head']), search.next())
+ self.assertEqual(set(['present']), search.next())
+ self.assertEqual(set(['child', 'ghost']),
+ search.next())
+ self.assertRaises(StopIteration, search.next)
+
+ def test_breadth_first_search_change_next_to_next_with_ghosts(self):
+ # To make the API robust, we allow calling both next() and
+ # next_with_ghosts() on the same searcher.
+ parent_graph = {
+ 'head':['present'],
+ 'present':['child', 'ghost'],
+ 'child':[],
+ }
+ parents_provider = InstrumentedParentsProvider(
+ _mod_graph.DictParentsProvider(parent_graph))
+ graph = _mod_graph.Graph(parents_provider)
+ # start with next_with_ghosts
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual((set(['head']), set()), search.next_with_ghosts())
+ self.assertEqual(set(['present']), search.next())
+ self.assertEqual((set(['child']), set(['ghost'])),
+ search.next_with_ghosts())
+ self.assertRaises(StopIteration, search.next)
+ # start with next
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual(set(['head']), search.next())
+ self.assertEqual((set(['present']), set()), search.next_with_ghosts())
+ self.assertEqual(set(['child', 'ghost']),
+ search.next())
+ self.assertRaises(StopIteration, search.next_with_ghosts)
+
+ def test_breadth_first_change_search(self):
+ # Changing the search should work with both next and next_with_ghosts.
+ parent_graph = {
+ 'head':['present'],
+ 'present':['stopped'],
+ 'other':['other_2'],
+ 'other_2':[],
+ }
+ parents_provider = InstrumentedParentsProvider(
+ _mod_graph.DictParentsProvider(parent_graph))
+ graph = _mod_graph.Graph(parents_provider)
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual((set(['head']), set()), search.next_with_ghosts())
+ self.assertEqual((set(['present']), set()), search.next_with_ghosts())
+ self.assertEqual(set(['present']),
+ search.stop_searching_any(['present']))
+ self.assertEqual((set(['other']), set(['other_ghost'])),
+ search.start_searching(['other', 'other_ghost']))
+ self.assertEqual((set(['other_2']), set()), search.next_with_ghosts())
+ self.assertRaises(StopIteration, search.next_with_ghosts)
+ # next includes them
+ search = graph._make_breadth_first_searcher(['head'])
+ self.assertEqual(set(['head']), search.next())
+ self.assertEqual(set(['present']), search.next())
+ self.assertEqual(set(['present']),
+ search.stop_searching_any(['present']))
+ search.start_searching(['other', 'other_ghost'])
+ self.assertEqual(set(['other_2']), search.next())
+ self.assertRaises(StopIteration, search.next)
+
class TestCachingParentsProvider(tests.TestCase):
More information about the bazaar-commits
mailing list