Rev 3093: Get rid of 'ParentsProvider.preload_parents' in favor of changing how in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
John Arbash Meinel
john at arbash-meinel.com
Fri Dec 7 17:32:42 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
------------------------------------------------------------
revno: 3093
revision-id:john at arbash-meinel.com-20071207173216-9gt7vfve0qzi6exr
parent: john at arbash-meinel.com-20071207155805-vv876dm1onsgr3g3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-07 11:32:16 -0600
message:
Get rid of 'ParentsProvider.preload_parents' in favor of changing how
step() works.
Split out the graph tests as recommended by Andrew Bennetts
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-12-07 15:58:05 +0000
+++ b/bzrlib/graph.py 2007-12-07 17:32:16 +0000
@@ -55,13 +55,6 @@
def get_parents(self, revisions):
return [self.ancestry.get(r, None) for r in revisions]
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that were not preloaded
- """
- return set(revisions).difference(revision_ids)
-
class _StackedParentsProvider(object):
@@ -94,18 +87,6 @@
break
return [found.get(r, None) for r in revision_ids]
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that are actually present
- """
- to_preload = set(revision_ids)
- for parent_provider in self._parent_providers:
- to_preload = parent_provider.preload_parents(to_preload)
- if not to_preload:
- break
- return to_preload
-
class CachingParentsProvider(object):
"""A parents provider which will cache the revision => parents in a dict.
@@ -151,18 +132,6 @@
parents.update(new_parents)
return [parents[r] for r in revision_ids]
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that are actually present
- """
- cache = self._cache
- needed = set(r for r in revision_ids if r not in cache)
- self._cache.update(zip(needed,
- self._real_provider.get_parents(needed)))
- # We preloaded everything
- return set()
-
class Graph(object):
"""Provide incremental access to revision graphs.
@@ -182,12 +151,6 @@
conforming to the behavior of StackedParentsProvider.get_parents
"""
self.get_parents = parents_provider.get_parents
- #self.preload_parents = parents_provider.preload_parents
- self.preload_parents = getattr(parents_provider, 'preload_parents',
- None)
- if self.preload_parents is None:
- self.preload_parents = lambda revs: set(revs)
-
self._parents_provider = parents_provider
def __repr__(self):
@@ -276,8 +239,8 @@
if len(active_searchers) == 0:
return border_ancestors, common_ancestors, [s.seen for s in
searchers]
- self.preload_parents(next_to_search)
- new_common = common_searcher.step()
+ parents = self.preload_parents(next_to_search)
+ new_common = common_searcher.step(parents)
if new_common:
common_ancestors.update(new_common)
for searcher in active_searchers:
@@ -286,7 +249,7 @@
newly_seen = set()
new_active_searchers = []
for searcher in active_searchers:
- new_ancestors = searcher.step()
+ new_ancestors = searcher.step(parents)
if new_ancestors:
newly_seen.update(new_ancestors)
new_active_searchers.append(searcher)
@@ -307,6 +270,15 @@
for searcher in active_searchers:
next_to_search.update(searcher.will_search())
+ def preload_parents(self, keys):
+ """Call get_parents() for all of the keys we will need.
+
+ :return: A dictionary mapping key => parents
+ """
+ # TODO: Use izip? the number of keys here will probably always be
+ # relatively small, it may be faster to just use zip
+ return dict(zip(keys, self.get_parents(keys)))
+
def heads(self, keys):
"""Return the heads from amongst keys.
@@ -335,13 +307,13 @@
active_searchers = dict(searchers)
# The first parents we will be accessing are just the candidate_heads,
# so ask the parents provider to pull them up
- self.preload_parents(candidate_heads)
+ parents = self.preload_parents(candidate_heads)
# skip over the actual candidate for each searcher, and figure out what
# the next nodes are going to be.
next_to_search = set()
for searcher in active_searchers.itervalues():
- next_to_search.update(searcher.step())
+ next_to_search.update(searcher.step(parents))
# The common walker finds nodes that are common to two or more of the
# input keys, so that we don't access all history when a currently
# uncommon search point actually meets up with something behind a
@@ -350,10 +322,10 @@
# accessing all history.
common_walker = self._make_breadth_first_searcher([])
while len(active_searchers) > 0:
- self.preload_parents(next_to_search)
+ parents = self.preload_parents(next_to_search)
ancestors = set()
# advance searches
- new_common = common_walker.step()
+ new_common = common_walker.step(parents)
for candidate in active_searchers.keys():
try:
searcher = active_searchers[candidate]
@@ -362,7 +334,7 @@
# through this for loop, because it was determined to be
# a descendant of another candidate.
continue
- next_ancestors = searcher.step()
+ next_ancestors = searcher.step(parents)
if next_ancestors:
ancestors.update(next_ancestors)
else:
@@ -517,7 +489,7 @@
self._start = set(revisions)
self._search_revisions = None
self.seen = set(revisions)
- self._parents_provider = parents_provider
+ self._parents_provider = parents_provider
def __repr__(self):
return ('_BreadthFirstSearcher(self._search_revisions=%r,'
@@ -525,22 +497,29 @@
def will_search(self):
"""Give the list of nodes that will be searched next."""
- return self._search_revisions
+ if self._search_revisions is None:
+ return self._start
+ else:
+ return self._search_revisions
- def step(self):
+ def step(self, parents):
"""Step to the next set of ancestors.
+ :param parents: A dictionary mapping revision_id => parent_ids
+ It is assumed that all parents will be available. Callers should
+ use "will_search()" to find what revisions this searcher will need,
+ and then load them outside of this function.
:return: A set of ancestors (in no particular order)
"""
if self._search_revisions is None:
self._search_revisions = self._start
else:
new_search_revisions = set()
- for parents in self._parents_provider.get_parents(
- self._search_revisions):
- if parents is None:
+ for revision_id in self._search_revisions:
+ parent_ids = parents[revision_id]
+ if parent_ids is None:
continue
- new_search_revisions.update(parents)
+ new_search_revisions.update(parent_ids)
new_search_revisions.difference_update(self.seen)
self._search_revisions = new_search_revisions
self.seen.update(self._search_revisions)
@@ -552,7 +531,10 @@
Ancestors are returned in the order they are seen in a breadth-first
traversal. No ancestor will be returned more than once.
"""
- next_revisions = self.step()
+ to_search = self.will_search()
+ parent_ids = self._parents_provider.get_parents(to_search)
+ parents = dict(zip(to_search, parent_ids))
+ next_revisions = self.step(parents)
if not next_revisions:
raise StopIteration()
return next_revisions
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-12-06 20:40:25 +0000
+++ b/bzrlib/index.py 2007-12-07 17:32:16 +0000
@@ -1024,10 +1024,6 @@
result.append(None)
return result
- def preload_parents(self, revision_ids):
- """See StackedParentsProvider.preload_parents"""
- return set(revision_ids)
-
def insert_index(self, pos, index):
"""Insert a new index in the list of indices to query.
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-12-06 21:36:36 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-12-07 17:32:16 +0000
@@ -74,15 +74,6 @@
parents_list.append(parents)
return parents_list
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that were not preloaded
- """
- # Knit records are already cached, so we just need to know what ones
- # are present
- return set(r for r in revision_ids if r not in self._knit)
-
class KnitRepository(MetaDirRepository):
"""Knit format repository."""
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-12-06 21:51:33 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-12-07 17:32:16 +0000
@@ -1858,14 +1858,6 @@
def _make_parents_provider(self):
return graph.CachingParentsProvider(self)
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that were not preloaded
- """
- # We don't do any preloading yet
- return set(revision_ids)
-
def _refresh_data(self):
if self._write_lock_count == 1 or (
self.control_files._lock_count == 1 and
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-12-06 20:40:25 +0000
+++ b/bzrlib/repository.py 2007-12-07 17:32:16 +0000
@@ -1660,14 +1660,6 @@
parents_list.append(parents)
return parents_list
- def preload_parents(self, revision_ids):
- """Cache the parents of these revisions, we will ask for them soon.
-
- :return: parents that were not preloaded
- """
- # The default implementation doesn't preload anything
- return set(revision_ids)
-
def _make_parents_provider(self):
return self
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-12-06 22:06:37 +0000
+++ b/bzrlib/tests/test_graph.py 2007-12-07 17:32:16 +0000
@@ -165,9 +165,6 @@
self.calls.extend(nodes)
return self._real_parents_provider.get_parents(nodes)
- def preload_parents(self, revision_ids):
- return self._real_parents_provider.preload_parents(revision_ids)
-
class TestGraph(TestCaseWithMemoryTransport):
@@ -497,11 +494,8 @@
self.fail('key deeper was accessed')
result.append(graph_dict[key])
return result
- def preload_parents(keys):
- return set(keys).difference(graph_dict)
an_obj = stub()
an_obj.get_parents = get_parents
- an_obj.preload_parents = preload_parents
graph = _mod_graph.Graph(an_obj)
return graph.heads(search)
@@ -543,62 +537,43 @@
class TestCachingParentsProvider(tests.TestCase):
+ def setUp(self):
+ super(TestCachingParentsProvider, self).setUp()
+ dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
+ self.inst_pp = InstrumentedParentsProvider(dict_pp)
+ self.caching_pp = _mod_graph.CachingParentsProvider(self.inst_pp)
+
def test_get_parents(self):
- dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
- inst_pp = InstrumentedParentsProvider(dict_pp)
- caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
- self.assertEqual({}, caching_pp._cache)
- self.assertEqual([('b',)], caching_pp.get_parents(['a']))
- self.assertEqual(['a'], inst_pp.calls)
- self.assertEqual([('b',)], caching_pp.get_parents(['a']))
+ """Requesting the same revision should be returned from cache"""
+ self.assertEqual({}, self.caching_pp._cache)
+ self.assertEqual([('b',)], self.caching_pp.get_parents(['a']))
+ self.assertEqual(['a'], self.inst_pp.calls)
+ self.assertEqual([('b',)], self.caching_pp.get_parents(['a']))
# No new call, as it should have been returned from the cache
- self.assertEqual(['a'], inst_pp.calls)
- self.assertEqual({'a':('b',)}, caching_pp._cache)
-
- self.assertEqual([None], caching_pp.get_parents(['b']))
- self.assertEqual(['a', 'b'], inst_pp.calls)
- self.assertEqual([None], caching_pp.get_parents(['b']))
- self.assertEqual(['a', 'b'], inst_pp.calls)
- self.assertEqual({'a':('b',), 'b':None}, caching_pp._cache)
-
- self.assertEqual([('b',), None, None, ('b',)],
- caching_pp.get_parents(['a', 'b', 'c', 'a']))
- # Only 'c' should be requested anew
- self.assertEqual(['a', 'b', 'c'], inst_pp.calls)
+ self.assertEqual(['a'], self.inst_pp.calls)
+ self.assertEqual({'a':('b',)}, self.caching_pp._cache)
+
+ def test_get_parents_not_present(self):
+ """The cache should also track when a revision doesn't exist"""
+ self.assertEqual([None], self.caching_pp.get_parents(['b']))
+ self.assertEqual(['b'], self.inst_pp.calls)
+ self.assertEqual([None], self.caching_pp.get_parents(['b']))
+ # No new calls
+ self.assertEqual(['b'], self.inst_pp.calls)
+ self.assertEqual({'b':None}, self.caching_pp._cache)
+
+ def test_get_parents_mixed(self):
+ """Anything that can be returned from cache, should be"""
+ self.assertEqual([None], self.caching_pp.get_parents(['b']))
+ self.assertEqual(['b'], self.inst_pp.calls)
+ self.assertEqual([('b',), None],
+ self.caching_pp.get_parents(['a', 'b']))
+ self.assertEqual(['b', 'a'], self.inst_pp.calls)
def test_get_parents_repeated(self):
- dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
- inst_pp = InstrumentedParentsProvider(dict_pp)
- caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
- self.assertEqual({}, caching_pp._cache)
- self.assertEqual([('b',), None, ('b',)],
- caching_pp.get_parents(['a', 'b', 'a']))
- # The repeated request for 'a' should be handled from the cache
- self.assertEqual(['a', 'b'], inst_pp.calls)
-
- def test_preload_parents(self):
- dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
- inst_pp = InstrumentedParentsProvider(dict_pp)
- caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
- self.assertEqual({}, caching_pp._cache)
- self.assertEqual(set(), caching_pp.preload_parents(['a', 'b']))
- self.assertEqual(['a', 'b'], inst_pp.calls)
- self.assertEqual([('b',)], caching_pp.get_parents(['a']))
- self.assertEqual([None], caching_pp.get_parents(['b']))
- # No new call, as it should have been returned from the cache
- self.assertEqual(['a', 'b'], inst_pp.calls)
- self.assertEqual({'a':('b',), 'b':None}, caching_pp._cache)
-
- def test_preload_parents_existing(self):
- dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
- inst_pp = InstrumentedParentsProvider(dict_pp)
- caching_pp = _mod_graph.CachingParentsProvider(inst_pp)
-
- self.assertEqual({}, caching_pp._cache)
- self.assertEqual([None], caching_pp.get_parents(['b']))
- self.assertEqual(['b'], inst_pp.calls)
- self.assertEqual(set(), caching_pp.preload_parents(['a', 'b']))
- self.assertEqual(['b', 'a'], inst_pp.calls)
+ """Asking for the same parent 2x will only forward 1 request."""
+ self.assertEqual([None, ('b',), None],
+ self.caching_pp.get_parents(['b', 'a', 'b']))
+ # Use sorted because we don't care about the order, just that each is
+ # only present 1 time.
+ self.assertEqual(['a', 'b'], sorted(self.inst_pp.calls))
More information about the bazaar-commits
mailing list