Rev 3178: * New method ``next_with_ghosts`` on the Graph breadth-first-search objects in http://people.ubuntu.com/~robertc/baz2.0/breadth-first-ghosts

Robert Collins robertc at robertcollins.net
Sun Jan 13 23:57:33 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/breadth-first-ghosts

------------------------------------------------------------
revno: 3178
revision-id:robertc at robertcollins.net-20080113235717-9a1w22q93j81nd0o
parent: pqm at pqm.ubuntu.com-20080111214408-cpkslxu95eg5c45u
committer: Robert Collins <robertc at robertcollins.net>
branch nick: breadth-first-ghosts
timestamp: Mon 2008-01-14 10:57:17 +1100
message:
   * New method ``next_with_ghosts`` on the Graph breadth-first-search objects
     which will split out ghosts and present parents into two separate sets,
     useful for code which needs to be aware of ghosts (e.g. fetching data
     cares about ghosts during revision selection). (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
=== modified file 'NEWS'
--- a/NEWS	2008-01-11 17:59:20 +0000
+++ b/NEWS	2008-01-13 23:57:17 +0000
@@ -167,6 +167,11 @@
      inventories. This is primarily used by the ``revision_trees`` method, as
      direct access to inventories is discouraged. (Robert Collins)
 
+   * New method ``next_with_ghosts`` on the Graph breadth-first-search objects
+     which will split out ghosts and present parents into two separate sets,
+     useful for code which needs to be aware of ghosts (e.g. fetching data
+     cares about ghosts during revision selection). (Robert Collins)
+
    * Parent Providers should now implement ``get_parent_map`` returning a
      dictionary instead of ``get_parents`` returning a list.
      ``Graph.get_parents`` is now deprecated. (John Arbash Meinel,

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2008-01-11 00:44:17 +0000
+++ b/bzrlib/graph.py	2008-01-13 23:57:17 +0000
@@ -506,39 +506,83 @@
     """
 
     def __init__(self, revisions, parents_provider):
-        self._start = set(revisions)
-        self._search_revisions = None
-        self.seen = set(revisions)
+        self._iterations = 0
+        self._next_query = set(revisions)
+        self.seen = set()
         self._parents_provider = parents_provider
+        self._returning = 'checked'
 
     def __repr__(self):
-        if self._search_revisions is not None:
-            search = 'searching=%r' % (list(self._search_revisions),)
+        if self._iterations:
+            prefix = "searching"
         else:
-            search = 'starting=%r' % (list(self._start),)
-        return ('_BreadthFirstSearcher(%s,'
-                ' seen=%r)' % (search, list(self.seen)))
+            prefix = "starting"
+        search = '%s=%r' % (prefix, list(self._next_query))
+        return ('_BreadthFirstSearcher(iterations=%d, %s,'
+                ' seen=%r)' % (self._iterations, search, list(self.seen)))
 
     def next(self):
         """Return the next ancestors of this revision.
 
         Ancestors are returned in the order they are seen in a breadth-first
-        traversal.  No ancestor will be returned more than once.
+        traversal.  No ancestor will be returned more than once. Ancestors are
+        returned before their parentage is queried, so ghosts and missing
+        revisions (including the start revisions) are included in the result.
+        This can save a round trip in LCA style calculation by allowing
+        convergence to be detected without reading the data for the revision
+        the convergence occurs on.
+
+        :return: A set of revision_ids.
         """
-        if self._search_revisions is None:
-            self._search_revisions = self._start
+        if self._returning != 'query':
+            # switch to returning the query, not the results.
+            self._returning = 'query'
+            self._iterations += 1
+            self.seen.update(self._next_query)
         else:
-            new_search_revisions = set()
-            parent_map = self._parents_provider.get_parent_map(
-                            self._search_revisions)
-            for parents in parent_map.itervalues():
-                new_search_revisions.update(p for p in parents if
-                                            p not in self.seen)
-            self._search_revisions = new_search_revisions
-        if len(self._search_revisions) == 0:
-            raise StopIteration()
-        self.seen.update(self._search_revisions)
-        return self._search_revisions
+            self._advance()
+        if len(self._next_query) == 0:
+            raise StopIteration()
+        return self._next_query
+
+    def next_with_ghosts(self):
+        """Return the next found ancestors, with ghosts split out.
+        
+        Ancestors are returned in the order they are seen in a breadth-first
+        traversal.  No ancestor will be returned more than once. Ancestors are
+        returned only after asking for their parents, which can
+
+        :return: A tuple with (present ancestors, ghost ancestors) sets.
+        """
+        if self._returning != 'checked':
+            # switch to returning the results, not the current query.
+            self._returning = 'checked'
+            self._advance()
+        if len(self._next_query) == 0:
+            raise StopIteration()
+        self._advance()
+        return self._current_present, self._current_ghosts
+
+    def _advance(self):
+        """Advance the search.
+
+        Updates self.seen, self._next_query, self._current_present,
+        self._current_ghosts.
+        """
+        self._iterations += 1
+        found_parents = set()
+        new_search_revisions = set()
+        parent_map = self._parents_provider.get_parent_map(
+                        self._next_query)
+        for rev_id, parents in parent_map.iteritems():
+            found_parents.add(rev_id)
+            new_search_revisions.update(p for p in parents if
+                                        p not in self.seen)
+        ghost_parents = self._next_query - found_parents
+        self._next_query = new_search_revisions
+        self.seen.update(self._next_query)
+        self._current_present = found_parents
+        self._current_ghosts = ghost_parents
 
     def __iter__(self):
         return self
@@ -562,13 +606,10 @@
         None of the specified revisions are required to be present in the
         search list.  In this case, the call is a no-op.
         """
-        stopped = self._search_revisions.intersection(revisions)
-        self._search_revisions = self._search_revisions.difference(revisions)
+        stopped = self._next_query.intersection(revisions)
+        self._next_query = self._next_query.difference(revisions)
         return stopped
 
     def start_searching(self, revisions):
-        if self._search_revisions is None:
-            self._start = set(revisions)
-        else:
-            self._search_revisions.update(revisions.difference(self.seen))
+        self._next_query.update(revisions.difference(self.seen))
         self.seen.update(revisions)

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2008-01-11 00:44:17 +0000
+++ b/bzrlib/tests/test_graph.py	2008-01-13 23:57:17 +0000
@@ -652,6 +652,70 @@
         self.assertEqual(set(['h1', 'h2']),
             self._run_heads_break_deeper(graph_dict, ['h1', 'h2']))
 
+    def test_breadth_first_search_start_ghosts(self):
+        parent_graph = {}
+        parents_provider = InstrumentedParentsProvider(
+            _mod_graph.DictParentsProvider(parent_graph))
+        graph = _mod_graph.Graph(parents_provider)
+        # with_ghosts reports the ghosts
+        search = graph._make_breadth_first_searcher(['a-ghost'])
+        self.assertEqual((set(), set(['a-ghost'])), search.next_with_ghosts())
+        self.assertRaises(StopIteration, search.next_with_ghosts)
+        # next includes them
+        search = graph._make_breadth_first_searcher(['a-ghost'])
+        self.assertEqual(set(['a-ghost']), search.next())
+        self.assertRaises(StopIteration, search.next)
+
+    def test_breadth_first_search_deep_ghosts(self):
+        parent_graph = {
+            'head':['present'],
+            'present':['child', 'ghost'],
+            'child':[],
+            }
+        parents_provider = InstrumentedParentsProvider(
+            _mod_graph.DictParentsProvider(parent_graph))
+        graph = _mod_graph.Graph(parents_provider)
+        # with_ghosts reports the ghosts
+        search = graph._make_breadth_first_searcher(['head'])
+        self.assertEqual((set(['head']), set()), search.next_with_ghosts())
+        self.assertEqual((set(['present']), set()), search.next_with_ghosts())
+        self.assertEqual((set(['child']), set(['ghost'])),
+            search.next_with_ghosts())
+        self.assertRaises(StopIteration, search.next_with_ghosts)
+        # next includes them
+        search = graph._make_breadth_first_searcher(['head'])
+        self.assertEqual(set(['head']), search.next())
+        self.assertEqual(set(['present']), search.next())
+        self.assertEqual(set(['child', 'ghost']),
+            search.next())
+        self.assertRaises(StopIteration, search.next)
+
+    def test_breadth_first_search_change_next_to_next_with_ghosts(self):
+        # To make the API robust, we allow changing from next() to
+        # next_with_ghosts() and vice verca.
+        parent_graph = {
+            'head':['present'],
+            'present':['child', 'ghost'],
+            'child':[],
+            }
+        parents_provider = InstrumentedParentsProvider(
+            _mod_graph.DictParentsProvider(parent_graph))
+        graph = _mod_graph.Graph(parents_provider)
+        # with_ghosts reports the ghosts
+        search = graph._make_breadth_first_searcher(['head'])
+        self.assertEqual((set(['head']), set()), search.next_with_ghosts())
+        self.assertEqual(set(['present']), search.next())
+        self.assertEqual((set(['child']), set(['ghost'])),
+            search.next_with_ghosts())
+        self.assertRaises(StopIteration, search.next)
+        # next includes them
+        search = graph._make_breadth_first_searcher(['head'])
+        self.assertEqual(set(['head']), search.next())
+        self.assertEqual((set(['present']), set()), search.next_with_ghosts())
+        self.assertEqual(set(['child', 'ghost']),
+            search.next())
+        self.assertRaises(StopIteration, search.next_with_ghosts)
+
 
 class TestCachingParentsProvider(tests.TestCase):
 



More information about the bazaar-commits mailing list