Rev 4191: Negatively cache misses during read-locks in RemoteRepository. in http://people.ubuntu.com/~robertc/baz2.0/pending/Remote.negative_parents_cache
Robert Collins
robertc at robertcollins.net
Tue Mar 24 05:12:10 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/pending/Remote.negative_parents_cache
------------------------------------------------------------
revno: 4191
revision-id: robertc at robertcollins.net-20090324051156-04bm37t0os1idrrs
parent: pqm at pqm.ubuntu.com-20090324015928-a4eisbr51odi0due
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Remote.negative_parents_cache
timestamp: Tue 2009-03-24 16:11:56 +1100
message:
Negatively cache misses during read-locks in RemoteRepository.
=== modified file 'NEWS'
--- a/NEWS 2009-03-24 01:08:12 +0000
+++ b/NEWS 2009-03-24 05:11:56 +0000
@@ -196,6 +196,10 @@
make visible data inserted into the repository by a smart server
fetch operation. (Robert Collins, Andrew Bennetts)
+* ``RemoteRepository`` will now negatively cache missing revisions during
+ ``get_parent_map`` while read-locked. Write-locks are unaffected.
+ (Robert Collins, Andrew Bennetts)
+
* Removed ``InterRemoteToOther``, ``InterOtherToRemote`` and
``InterPackToRemotePack`` classes, as they are now unnecessary.
(Andrew Bennetts)
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2009-03-23 07:25:27 +0000
+++ b/bzrlib/graph.py 2009-03-24 05:11:56 +0000
@@ -121,8 +121,8 @@
self._get_parent_map = self._real_provider.get_parent_map
else:
self._get_parent_map = get_parent_map
- self._cache = {}
- self._cache_misses = True
+ self._cache = None
+ self.enable_cache(True)
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
@@ -133,38 +133,44 @@
raise AssertionError('Cache enabled when already enabled.')
self._cache = {}
self._cache_misses = cache_misses
+ self._missing_keys = set()
def disable_cache(self):
"""Disable and clear the cache."""
self._cache = None
+ self._cache_misses = None
+ self._missing_keys = set()
def get_cached_map(self):
"""Return any cached get_parent_map values."""
if self._cache is None:
return None
- return dict((k, v) for k, v in self._cache.items()
- if v is not None)
+ return dict(self._cache)
def get_parent_map(self, keys):
"""See _StackedParentsProvider.get_parent_map."""
- # Hack to build up the caching logic.
- ancestry = self._cache
- if ancestry is None:
- # Caching is disabled.
- missing_revisions = set(keys)
- ancestry = {}
+ cache = self._cache
+ if cache is None:
+ cache = self._get_parent_map(keys)
else:
- missing_revisions = set(key for key in keys if key not in ancestry)
- if missing_revisions:
- parent_map = self._get_parent_map(missing_revisions)
- ancestry.update(parent_map)
- if self._cache_misses:
- # None is never a valid parents list, so it can be used to
- # record misses.
- ancestry.update(dict((k, None) for k in missing_revisions
- if k not in parent_map))
- present_keys = [k for k in keys if ancestry.get(k) is not None]
- return dict((k, ancestry[k]) for k in present_keys)
+ # XXX: Full scan of cache, keeping a set of cached keys will scale
+ # better.
+ needed_revisions = set(key for key in keys if key not in cache)
+ # Do not ask for negatively cached keys
+ needed_revisions.difference_update(self._missing_keys)
+ if needed_revisions:
+ parent_map = self._get_parent_map(needed_revisions)
+ cache.update(parent_map)
+ if self._cache_misses:
+ for key in needed_revisions:
+ if key not in parent_map:
+ self._missing_keys.add(key)
+ result = {}
+ for key in keys:
+ value = cache.get(key)
+ if value is not None:
+ result[key] = value
+ return result
class Graph(object):
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2009-03-24 01:08:12 +0000
+++ b/bzrlib/remote.py 2009-03-24 05:11:56 +0000
@@ -828,7 +828,7 @@
if not self._lock_mode:
self._lock_mode = 'r'
self._lock_count = 1
- self._unstacked_provider.enable_cache(cache_misses=False)
+ self._unstacked_provider.enable_cache(cache_misses=True)
if self._real_repository is not None:
self._real_repository.lock_read()
else:
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2009-03-17 06:18:16 +0000
+++ b/bzrlib/tests/test_graph.py 2009-03-24 05:11:56 +0000
@@ -1385,6 +1385,12 @@
class TestCachingParentsProvider(tests.TestCase):
+ """These tests run with:
+
+ self.inst_pp, a recording parents provider with a graph of a->b, and b is a
+ ghost.
+ self.caching_pp, a CachingParentsProvider layered on inst_pp.
+ """
def setUp(self):
super(TestCachingParentsProvider, self).setUp()
@@ -1409,7 +1415,6 @@
self.assertEqual({}, self.caching_pp.get_parent_map(['b']))
# No new calls
self.assertEqual(['b'], self.inst_pp.calls)
- self.assertEqual({'b':None}, self.caching_pp._cache)
def test_get_parent_map_mixed(self):
"""Anything that can be returned from cache, should be"""
=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py 2009-03-24 01:08:12 +0000
+++ b/bzrlib/tests/test_remote.py 2009-03-24 05:11:56 +0000
@@ -1623,6 +1623,31 @@
errors.UnexpectedSmartServerResponse,
repo.get_parent_map, ['a-revision-id'])
+ def test_get_parent_map_negative_caches_missing_keys(self):
+ self.setup_smart_server_with_call_log()
+ repo = self.make_repository('foo')
+ self.assertIsInstance(repo, RemoteRepository)
+ repo.lock_read()
+ self.addCleanup(repo.unlock)
+ self.reset_smart_call_log()
+ graph = repo.get_graph()
+ self.assertEqual({},
+ graph.get_parent_map(['some-missing', 'other-missing']))
+ self.assertLength(1, self.hpss_calls)
+ # No call if we repeat this
+ self.reset_smart_call_log()
+ graph = repo.get_graph()
+ self.assertEqual({},
+ graph.get_parent_map(['some-missing', 'other-missing']))
+ self.assertLength(0, self.hpss_calls)
+ # Asking for more unknown keys makes a request.
+ self.reset_smart_call_log()
+ graph = repo.get_graph()
+ self.assertEqual({},
+ graph.get_parent_map(['some-missing', 'other-missing',
+ 'more-missing']))
+ self.assertLength(1, self.hpss_calls)
+
class TestGetParentMapAllowsNew(tests.TestCaseWithTransport):
More information about the bazaar-commits
mailing list