Rev 3093: Get rid of 'ParentsProvider.preload_parents' in favor of changing how in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

Fri Dec 7 17:32:42 GMT 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

------------------------------------------------------------
revno: 3093
revision-id:john at arbash-meinel.com-20071207173216-9gt7vfve0qzi6exr
parent: john at arbash-meinel.com-20071207155805-vv876dm1onsgr3g3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-07 11:32:16 -0600
message:
  Get rid of 'ParentsProvider.preload_parents' in favor of changing how
  step() works.
  Split out the graph tests as recommended by Andrew Bennetts
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2007-12-07 15:58:05 +0000
+++ b/bzrlib/graph.py	2007-12-07 17:32:16 +0000
@@ -55,13 +55,6 @@
     def get_parents(self, revisions):
         return [self.ancestry.get(r, None) for r in revisions]
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that were not preloaded
-        """
-        return set(revisions).difference(revision_ids)
-
 
 class _StackedParentsProvider(object):
 
@@ -94,18 +87,6 @@
                 break
         return [found.get(r, None) for r in revision_ids]
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that are actually present
-        """
-        to_preload = set(revision_ids)
-        for parent_provider in self._parent_providers:
-            to_preload = parent_provider.preload_parents(to_preload)
-            if not to_preload:
-                break
-        return to_preload
-
 
 class CachingParentsProvider(object):
     """A parents provider which will cache the revision => parents in a dict.
@@ -151,18 +132,6 @@
             parents.update(new_parents)
         return [parents[r] for r in revision_ids]
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that are actually present
-        """
-        cache = self._cache
-        needed = set(r for r in revision_ids if r not in cache)
-        self._cache.update(zip(needed,
-                               self._real_provider.get_parents(needed)))
-        # We preloaded everything
-        return set()
-
 
 class Graph(object):
     """Provide incremental access to revision graphs.
@@ -182,12 +151,6 @@
             conforming to the behavior of StackedParentsProvider.get_parents
         """
         self.get_parents = parents_provider.get_parents
-        #self.preload_parents = parents_provider.preload_parents
-        self.preload_parents = getattr(parents_provider, 'preload_parents',
-                                       None)
-        if self.preload_parents is None:
-            self.preload_parents = lambda revs: set(revs)
-
         self._parents_provider = parents_provider
 
     def __repr__(self):
@@ -276,8 +239,8 @@
             if len(active_searchers) == 0:
                 return border_ancestors, common_ancestors, [s.seen for s in
                                                             searchers]
-            self.preload_parents(next_to_search)
-            new_common = common_searcher.step()
+            parents = self.preload_parents(next_to_search)
+            new_common = common_searcher.step(parents)
             if new_common:
                 common_ancestors.update(new_common)
                 for searcher in active_searchers:
@@ -286,7 +249,7 @@
             newly_seen = set()
             new_active_searchers = []
             for searcher in active_searchers:
-                new_ancestors = searcher.step()
+                new_ancestors = searcher.step(parents)
                 if new_ancestors:
                     newly_seen.update(new_ancestors)
                     new_active_searchers.append(searcher)
@@ -307,6 +270,15 @@
             for searcher in active_searchers:
                 next_to_search.update(searcher.will_search())
 
+    def preload_parents(self, keys):
+        """Call get_parents() for all of the keys we will need.
+
+        :return: A dictionary mapping key => parents
+        """
+        # TODO: Use izip? the number of keys here will probably always be
+        #       relatively small, it may be faster to just use zip
+        return dict(zip(keys, self.get_parents(keys)))
+
     def heads(self, keys):
         """Return the heads from amongst keys.
 
@@ -335,13 +307,13 @@
         active_searchers = dict(searchers)
         # The first parents we will be accessing are just the candidate_heads,
         # so ask the parents provider to pull them up
-        self.preload_parents(candidate_heads)
+        parents = self.preload_parents(candidate_heads)
 
         # skip over the actual candidate for each searcher, and figure out what
         # the next nodes are going to be.
         next_to_search = set()
         for searcher in active_searchers.itervalues():
-            next_to_search.update(searcher.step())
+            next_to_search.update(searcher.step(parents))
         # The common walker finds nodes that are common to two or more of the
         # input keys, so that we don't access all history when a currently
         # uncommon search point actually meets up with something behind a
@@ -350,10 +322,10 @@
         # accessing all history.
         common_walker = self._make_breadth_first_searcher([])
         while len(active_searchers) > 0:
-            self.preload_parents(next_to_search)
+            parents = self.preload_parents(next_to_search)
             ancestors = set()
             # advance searches
-            new_common = common_walker.step()
+            new_common = common_walker.step(parents)
             for candidate in active_searchers.keys():
                 try:
                     searcher = active_searchers[candidate]
@@ -362,7 +334,7 @@
                     # through this for loop, because it was determined to be
                     # a descendant of another candidate.
                     continue
-                next_ancestors = searcher.step()
+                next_ancestors = searcher.step(parents)
                 if next_ancestors:
                     ancestors.update(next_ancestors)
                 else:
@@ -517,7 +489,7 @@
         self._start = set(revisions)
         self._search_revisions = None
         self.seen = set(revisions)
-        self._parents_provider = parents_provider 
+        self._parents_provider = parents_provider
 
     def __repr__(self):
         return ('_BreadthFirstSearcher(self._search_revisions=%r,'
@@ -525,22 +497,29 @@
 
     def will_search(self):
         """Give the list of nodes that will be searched next."""
-        return self._search_revisions
+        if self._search_revisions is None:
+            return self._start
+        else:
+            return self._search_revisions
 
-    def step(self):
+    def step(self, parents):
         """Step to the next set of ancestors.
 
+        :param parents: A dictionary mapping revision_id => parent_ids
+            It is assumed that all parents will be available. Callers should
+            use "will_search()" to find what revisions this searcher will need,
+            and then load them outside of this function.
         :return: A set of ancestors (in no particular order)
         """
         if self._search_revisions is None:
             self._search_revisions = self._start
         else:
             new_search_revisions = set()
-            for parents in self._parents_provider.get_parents(
-                self._search_revisions):
-                if parents is None:
+            for revision_id in self._search_revisions:
+                parent_ids = parents[revision_id]
+                if parent_ids is None:
                     continue
-                new_search_revisions.update(parents)
+                new_search_revisions.update(parent_ids)
             new_search_revisions.difference_update(self.seen)
             self._search_revisions = new_search_revisions
         self.seen.update(self._search_revisions)
@@ -552,7 +531,10 @@
         Ancestors are returned in the order they are seen in a breadth-first
         traversal.  No ancestor will be returned more than once.
         """
-        next_revisions = self.step()
+        to_search = self.will_search()
+        parent_ids = self._parents_provider.get_parents(to_search)
+        parents = dict(zip(to_search, parent_ids))
+        next_revisions = self.step(parents)
         if not next_revisions:
             raise StopIteration()
         return next_revisions

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-12-06 20:40:25 +0000
+++ b/bzrlib/index.py	2007-12-07 17:32:16 +0000
@@ -1024,10 +1024,6 @@
                 result.append(None)
         return result
 
-    def preload_parents(self, revision_ids):
-        """See StackedParentsProvider.preload_parents"""
-        return set(revision_ids)
-
     def insert_index(self, pos, index):
         """Insert a new index in the list of indices to query.
 

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-12-06 21:36:36 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-12-07 17:32:16 +0000
@@ -74,15 +74,6 @@
             parents_list.append(parents)
         return parents_list
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that were not preloaded
-        """
-        # Knit records are already cached, so we just need to know what ones
-        # are present
-        return set(r for r in revision_ids if r not in self._knit)
-
 
 class KnitRepository(MetaDirRepository):
     """Knit format repository."""

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-12-06 21:51:33 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-12-07 17:32:16 +0000
@@ -1858,14 +1858,6 @@
     def _make_parents_provider(self):
         return graph.CachingParentsProvider(self)
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that were not preloaded
-        """
-        # We don't do any preloading yet
-        return set(revision_ids)
-
     def _refresh_data(self):
         if self._write_lock_count == 1 or (
             self.control_files._lock_count == 1 and

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-12-06 20:40:25 +0000
+++ b/bzrlib/repository.py	2007-12-07 17:32:16 +0000
@@ -1660,14 +1660,6 @@
             parents_list.append(parents)
         return parents_list
 
-    def preload_parents(self, revision_ids):
-        """Cache the parents of these revisions, we will ask for them soon.
-
-        :return: parents that were not preloaded
-        """
-        # The default implementation doesn't preload anything
-        return set(revision_ids)
-
     def _make_parents_provider(self):
         return self
 

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2007-12-06 22:06:37 +0000
+++ b/bzrlib/tests/test_graph.py	2007-12-07 17:32:16 +0000
@@ -165,9 +165,6 @@
         self.calls.extend(nodes)
         return self._real_parents_provider.get_parents(nodes)
 
-    def preload_parents(self, revision_ids):
-        return self._real_parents_provider.preload_parents(revision_ids)
-
 
 class TestGraph(TestCaseWithMemoryTransport):
 
@@ -497,11 +494,8 @@
                     self.fail('key deeper was accessed')
                 result.append(graph_dict[key])
             return result
-        def preload_parents(keys):
-            return set(keys).difference(graph_dict)
         an_obj = stub()
         an_obj.get_parents = get_parents
-        an_obj.preload_parents = preload_parents
         graph = _mod_graph.Graph(an_obj)
         return graph.heads(search)
 
@@ -543,62 +537,43 @@
 
 class TestCachingParentsProvider(tests.TestCase):
 
+    def setUp(self):
+        super(TestCachingParentsProvider, self).setUp()
+        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
+        self.inst_pp = InstrumentedParentsProvider(dict_pp)
+        self.caching_pp = _mod_graph.CachingParentsProvider(self.inst_pp)
+
     def test_get_parents(self):
-        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
-        inst_pp = InstrumentedParentsProvider(dict_pp)
-        caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
-        self.assertEqual({}, caching_pp._cache)
-        self.assertEqual([('b',)], caching_pp.get_parents(['a']))
-        self.assertEqual(['a'], inst_pp.calls)
-        self.assertEqual([('b',)], caching_pp.get_parents(['a']))
+        """Requesting the same revision should be returned from cache"""
+        self.assertEqual({}, self.caching_pp._cache)
+        self.assertEqual([('b',)], self.caching_pp.get_parents(['a']))
+        self.assertEqual(['a'], self.inst_pp.calls)
+        self.assertEqual([('b',)], self.caching_pp.get_parents(['a']))
         # No new call, as it should have been returned from the cache
-        self.assertEqual(['a'], inst_pp.calls)
-        self.assertEqual({'a':('b',)}, caching_pp._cache)
-
-        self.assertEqual([None], caching_pp.get_parents(['b']))
-        self.assertEqual(['a', 'b'], inst_pp.calls)
-        self.assertEqual([None], caching_pp.get_parents(['b']))
-        self.assertEqual(['a', 'b'], inst_pp.calls)
-        self.assertEqual({'a':('b',), 'b':None}, caching_pp._cache)
-
-        self.assertEqual([('b',), None, None, ('b',)],
-                         caching_pp.get_parents(['a', 'b', 'c', 'a']))
-        # Only 'c' should be requested anew
-        self.assertEqual(['a', 'b', 'c'], inst_pp.calls)
+        self.assertEqual(['a'], self.inst_pp.calls)
+        self.assertEqual({'a':('b',)}, self.caching_pp._cache)
+
+    def test_get_parents_not_present(self):
+        """The cache should also track when a revision doesn't exist"""
+        self.assertEqual([None], self.caching_pp.get_parents(['b']))
+        self.assertEqual(['b'], self.inst_pp.calls)
+        self.assertEqual([None], self.caching_pp.get_parents(['b']))
+        # No new calls
+        self.assertEqual(['b'], self.inst_pp.calls)
+        self.assertEqual({'b':None}, self.caching_pp._cache)
+
+    def test_get_parents_mixed(self):
+        """Anything that can be returned from cache, should be"""
+        self.assertEqual([None], self.caching_pp.get_parents(['b']))
+        self.assertEqual(['b'], self.inst_pp.calls)
+        self.assertEqual([('b',), None],
+                         self.caching_pp.get_parents(['a', 'b']))
+        self.assertEqual(['b', 'a'], self.inst_pp.calls)
 
     def test_get_parents_repeated(self):
-        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
-        inst_pp = InstrumentedParentsProvider(dict_pp)
-        caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
-        self.assertEqual({}, caching_pp._cache)
-        self.assertEqual([('b',), None, ('b',)],
-                         caching_pp.get_parents(['a', 'b', 'a']))
-        # The repeated request for 'a' should be handled from the cache
-        self.assertEqual(['a', 'b'], inst_pp.calls)
-
-    def test_preload_parents(self):
-        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
-        inst_pp = InstrumentedParentsProvider(dict_pp)
-        caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
-        self.assertEqual({}, caching_pp._cache)
-        self.assertEqual(set(), caching_pp.preload_parents(['a', 'b']))
-        self.assertEqual(['a', 'b'], inst_pp.calls)
-        self.assertEqual([('b',)], caching_pp.get_parents(['a']))
-        self.assertEqual([None], caching_pp.get_parents(['b']))
-        # No new call, as it should have been returned from the cache
-        self.assertEqual(['a', 'b'], inst_pp.calls)
-        self.assertEqual({'a':('b',), 'b':None}, caching_pp._cache)
-
-    def test_preload_parents_existing(self):
-        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
-        inst_pp = InstrumentedParentsProvider(dict_pp)
-        caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
-        self.assertEqual({}, caching_pp._cache)
-        self.assertEqual([None], caching_pp.get_parents(['b']))
-        self.assertEqual(['b'], inst_pp.calls)
-        self.assertEqual(set(), caching_pp.preload_parents(['a', 'b']))
-        self.assertEqual(['b', 'a'], inst_pp.calls)
+        """Asking for the same parent 2x will only forward 1 request."""
+        self.assertEqual([None, ('b',), None],
+                         self.caching_pp.get_parents(['b', 'a', 'b']))
+        # Use sorted because we don't care about the order, just that each is
+        # only present 1 time.
+        self.assertEqual(['a', 'b'], sorted(self.inst_pp.calls))