Rev 3197: do not chop Repository.get_parents_map results at 64K exactly, finish the current search iteration to prevent causing search skew with the client which leads to many small requests. in http://people.ubuntu.com/~robertc/baz2.0/search-results

Robert Collins robertc at robertcollins.net
Fri Jan 18 05:18:29 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/search-results

------------------------------------------------------------
revno: 3197
revision-id:robertc at robertcollins.net-20080118051823-oswu043b6huldks3
parent: robertc at robertcollins.net-20080118040529-7uc3xpwd6yovctoz
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Repository.get_parent_map
timestamp: Fri 2008-01-18 16:18:23 +1100
message:
  do not chop Repository.get_parents_map results at 64K exactly, finish the current search iteration to prevent causing search skew with the client which leads to many small requests.
modified:
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2008-01-18 03:43:19 +0000
+++ b/bzrlib/remote.py	2008-01-18 05:18:23 +0000
@@ -768,7 +768,7 @@
         missing_revisions = set(key for key in keys if key not in ancestry)
         if missing_revisions:
             self._parents_map.update(
-                self._get_parent_map(missing_revisions, current_search))
+                self._get_parent_map(missing_revisions))
         return dict((k, ancestry[k]) for k in keys if k in ancestry)
 
     def _response_is_unknown_method(self, response, verb):
@@ -789,7 +789,7 @@
            return True
         return False
 
-    def _get_parent_map(self, keys, current_search):
+    def _get_parent_map(self, keys):
         """Helper for get_parent_map that performs the RPC."""
         keys = set(keys)
         if NULL_REVISION in keys:
@@ -799,10 +799,25 @@
                 return found_parents
         else:
             found_parents = {}
-        if current_search is not None:
-            recipe = current_search.get_result().get_recipe()
-        else:
-            recipe = (set(), set(), 0)
+        # TODO(Needs analysis): We could assume that the keys being requested
+        # from get_parent_map are in a breadth first search, so typically they
+        # will all be depth N from some common parent, and we don't have to
+        # have the server iterate from the root parent, but rather from the
+        # keys we're searching; and just tell the server the keyspace we
+        # already have; but this may be more traffic again.
+
+        # Transform self._parents_map into a search request recipe.
+        # TODO: Manage this incrementally to avoid covering the same path
+        # repeatedly. (The server will have to on each request, but the less
+        # work done the better).
+        start_set = set(self._parents_map)
+        result_parents = set()
+        for parents in self._parents_map.itervalues():
+            result_parents.update(parents)
+        stop_keys = result_parents.difference(start_set)
+        included_keys = start_set.intersection(result_parents)
+        start_set.difference_update(included_keys)
+        recipe = (start_set, stop_keys, len(self._parents_map))
         body = self._serialise_search_recipe(recipe)
         path = self.bzrdir._path_for_remote_call(self._client)
         for key in keys:



More information about the bazaar-commits mailing list