Rev 4191: Negatively cache misses during read-locks in RemoteRepository. in http://people.ubuntu.com/~robertc/baz2.0/pending/Remote.negative_parents_cache

Robert Collins robertc at robertcollins.net
Tue Mar 24 05:12:10 GMT 2009


At http://people.ubuntu.com/~robertc/baz2.0/pending/Remote.negative_parents_cache

------------------------------------------------------------
revno: 4191
revision-id: robertc at robertcollins.net-20090324051156-04bm37t0os1idrrs
parent: pqm at pqm.ubuntu.com-20090324015928-a4eisbr51odi0due
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Remote.negative_parents_cache
timestamp: Tue 2009-03-24 16:11:56 +1100
message:
  Negatively cache misses during read-locks in RemoteRepository.
=== modified file 'NEWS'
--- a/NEWS	2009-03-24 01:08:12 +0000
+++ b/NEWS	2009-03-24 05:11:56 +0000
@@ -196,6 +196,10 @@
   make visible data inserted into the repository by a smart server
   fetch operation. (Robert Collins, Andrew Bennetts)
 
+* ``RemoteRepository`` will now negatively cache missing revisions during
+  ``get_parent_map`` while read-locked. Write-locks are unaffected.
+   (Robert Collins, Andrew Bennetts)
+
 * Removed ``InterRemoteToOther``, ``InterOtherToRemote`` and
   ``InterPackToRemotePack`` classes, as they are now unnecessary.
   (Andrew Bennetts)

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2009-03-23 07:25:27 +0000
+++ b/bzrlib/graph.py	2009-03-24 05:11:56 +0000
@@ -121,8 +121,8 @@
             self._get_parent_map = self._real_provider.get_parent_map
         else:
             self._get_parent_map = get_parent_map
-        self._cache = {}
-        self._cache_misses = True
+        self._cache = None
+        self.enable_cache(True)
 
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._real_provider)
@@ -133,38 +133,44 @@
             raise AssertionError('Cache enabled when already enabled.')
         self._cache = {}
         self._cache_misses = cache_misses
+        self._missing_keys = set()
 
     def disable_cache(self):
         """Disable and clear the cache."""
         self._cache = None
+        self._cache_misses = None
+        self._missing_keys = set()
 
     def get_cached_map(self):
         """Return any cached get_parent_map values."""
         if self._cache is None:
             return None
-        return dict((k, v) for k, v in self._cache.items()
-                    if v is not None)
+        return dict(self._cache)
 
     def get_parent_map(self, keys):
         """See _StackedParentsProvider.get_parent_map."""
-        # Hack to build up the caching logic.
-        ancestry = self._cache
-        if ancestry is None:
-            # Caching is disabled.
-            missing_revisions = set(keys)
-            ancestry = {}
+        cache = self._cache
+        if cache is None:
+            cache = self._get_parent_map(keys)
         else:
-            missing_revisions = set(key for key in keys if key not in ancestry)
-        if missing_revisions:
-            parent_map = self._get_parent_map(missing_revisions)
-            ancestry.update(parent_map)
-            if self._cache_misses:
-                # None is never a valid parents list, so it can be used to
-                # record misses.
-                ancestry.update(dict((k, None) for k in missing_revisions
-                                     if k not in parent_map))
-        present_keys = [k for k in keys if ancestry.get(k) is not None]
-        return dict((k, ancestry[k]) for k in present_keys)
+            # XXX: Full scan of cache, keeping a set of cached keys will scale
+            # better.
+            needed_revisions = set(key for key in keys if key not in cache)
+            # Do not ask for negatively cached keys
+            needed_revisions.difference_update(self._missing_keys)
+            if needed_revisions:
+                parent_map = self._get_parent_map(needed_revisions)
+                cache.update(parent_map)
+                if self._cache_misses:
+                    for key in needed_revisions:
+                        if key not in parent_map:
+                            self._missing_keys.add(key)
+        result = {}
+        for key in keys:
+            value = cache.get(key)
+            if value is not None:
+                result[key] = value
+        return result
 
 
 class Graph(object):

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2009-03-24 01:08:12 +0000
+++ b/bzrlib/remote.py	2009-03-24 05:11:56 +0000
@@ -828,7 +828,7 @@
         if not self._lock_mode:
             self._lock_mode = 'r'
             self._lock_count = 1
-            self._unstacked_provider.enable_cache(cache_misses=False)
+            self._unstacked_provider.enable_cache(cache_misses=True)
             if self._real_repository is not None:
                 self._real_repository.lock_read()
         else:

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2009-03-17 06:18:16 +0000
+++ b/bzrlib/tests/test_graph.py	2009-03-24 05:11:56 +0000
@@ -1385,6 +1385,12 @@
 
 
 class TestCachingParentsProvider(tests.TestCase):
+    """These tests run with:
+
+    self.inst_pp, a recording parents provider with a graph of a->b, and b is a
+    ghost.
+    self.caching_pp, a CachingParentsProvider layered on inst_pp.
+    """
 
     def setUp(self):
         super(TestCachingParentsProvider, self).setUp()
@@ -1409,7 +1415,6 @@
         self.assertEqual({}, self.caching_pp.get_parent_map(['b']))
         # No new calls
         self.assertEqual(['b'], self.inst_pp.calls)
-        self.assertEqual({'b':None}, self.caching_pp._cache)
 
     def test_get_parent_map_mixed(self):
         """Anything that can be returned from cache, should be"""

=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py	2009-03-24 01:08:12 +0000
+++ b/bzrlib/tests/test_remote.py	2009-03-24 05:11:56 +0000
@@ -1623,6 +1623,31 @@
             errors.UnexpectedSmartServerResponse,
             repo.get_parent_map, ['a-revision-id'])
 
+    def test_get_parent_map_negative_caches_missing_keys(self):
+        self.setup_smart_server_with_call_log()
+        repo = self.make_repository('foo')
+        self.assertIsInstance(repo, RemoteRepository)
+        repo.lock_read()
+        self.addCleanup(repo.unlock)
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing']))
+        self.assertLength(1, self.hpss_calls)
+        # No call if we repeat this
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing']))
+        self.assertLength(0, self.hpss_calls)
+        # Asking for more unknown keys makes a request.
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing',
+                'more-missing']))
+        self.assertLength(1, self.hpss_calls)
+
 
 class TestGetParentMapAllowsNew(tests.TestCaseWithTransport):
 




More information about the bazaar-commits mailing list