Rev 3195: * New smart method ``Repository.get_parent_map`` for getting revision in http://people.ubuntu.com/~robertc/baz2.0/search-results

Fri Jan 18 03:43:35 GMT 2008

At http://people.ubuntu.com/~robertc/baz2.0/search-results

------------------------------------------------------------
revno: 3195
revision-id:robertc at robertcollins.net-20080118034319-tp2b9j8u4h1uxspx
parent: robertc at robertcollins.net-20080118034035-ay3xbm04pypsago2
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Repository.get_parent_map
timestamp: Fri 2008-01-18 14:43:19 +1100
message:
   * New smart method ``Repository.get_parent_map`` for getting revision
     parent data. This returns additional parent information topologically
     adjacent to the requested data to reduce round trip latency impacts.
     (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
  bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
=== modified file 'NEWS'

--- a/NEWS	2008-01-18 02:36:57 +0000
+++ b/NEWS	2008-01-18 03:43:19 +0000
@@ -25,6 +25,11 @@
     * ``merge --preview`` produces a diff of the changes merge would make,
       but does not actually perform the merge.  (Aaron Bentley)
 
+    * New smart method ``Repository.get_parent_map`` for getting revision
+      parent data. This returns additional parent information topologically
+      adjacent to the requested data to reduce round trip latency impacts.
+      (Robert Collins)
+
     * New smart method, ``Repository.stream_revisions_chunked``, for fetching
       revision data that streams revision data via a chunked encoding.  This
       avoids buffering large amounts of revision data on the server and on the

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2008-01-18 02:36:57 +0000
+++ b/bzrlib/graph.py	2008-01-18 03:43:19 +0000
@@ -544,8 +544,16 @@
             # exclude keys for them. However, while we could have a second
             # look-ahead result buffer and shuffle things around, this method
             # is typically only called once per search - when memoising the
-            # results of the search.
-            found, ghosts, next, parents = self._do_query(self._next_query)
+            # results of the search. 
+            # XXX: TODO: NOTE: ote that we cannot supply the current search to
+            # the call to get_parent_map in this case because it leads to
+            # infinite recursion. (remote.g_p_m -> get_result ->
+            # remote.get_parent_map) - the revisions needed to determine what
+            # keys to exclude to recreate current state are precisely those
+            # being queried. This is likely a problem that will need to be
+            # addressed (perhaps by extending the search recipe to have two
+            # exclude lists - exclude_from and exclude_at). - RBC 20080118
+            found, ghosts, next, parents = self._do_query(self._next_query, False)
             # pretend we didn't query: perhaps we should tweak _do_query to be
             # entirely stateless?
             self.seen.difference_update(next)
@@ -618,7 +626,7 @@
         # repeated when ghosts are filled.
         self._stopped_keys.update(ghosts)
 
-    def _do_query(self, revisions):
+    def _do_query(self, revisions, provide_self=True):
         """Query for revisions.
 
         Adds revisions to the seen set.
@@ -632,7 +640,12 @@
         # revisions may contain nodes that point to other nodes in revisions:
         # we want to filter them out.
         self.seen.update(revisions)
-        parent_map = self._parents_provider.get_parent_map(revisions, self)
+        if provide_self:
+            current_search = self
+        else:
+            current_search = None
+        parent_map = self._parents_provider.get_parent_map(revisions,
+            current_search)
         for rev_id, parents in parent_map.iteritems():
             found_parents.add(rev_id)
             parents_of_found.update(p for p in parents if p not in self.seen)

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2008-01-18 02:36:57 +0000
+++ b/bzrlib/remote.py	2008-01-18 03:43:19 +0000
@@ -767,7 +767,8 @@
         ancestry = self._parents_map
         missing_revisions = set(key for key in keys if key not in ancestry)
         if missing_revisions:
-            self._parents_map.update(self._get_parent_map(missing_revisions))
+            self._parents_map.update(
+                self._get_parent_map(missing_revisions, current_search))
         return dict((k, ancestry[k]) for k in keys if k in ancestry)
 
     def _response_is_unknown_method(self, response, verb):
@@ -788,7 +789,7 @@
            return True
         return False
 
-    def _get_parent_map(self, keys):
+    def _get_parent_map(self, keys, current_search):
         """Helper for get_parent_map that performs the RPC."""
         keys = set(keys)
         if NULL_REVISION in keys:
@@ -798,12 +799,18 @@
                 return found_parents
         else:
             found_parents = {}
+        if current_search is not None:
+            recipe = current_search.get_result().get_recipe()
+        else:
+            recipe = (set(), set(), 0)
+        body = self._serialise_search_recipe(recipe)
         path = self.bzrdir._path_for_remote_call(self._client)
         for key in keys:
             assert type(key) is str
         verb = 'Repository.get_parent_map'
-        response = self._client.call_expecting_body(
-            verb, path, *keys)
+        args = (path,) + tuple(keys)
+        response = self._client.call_with_body_bytes_expecting_body(
+            verb, args, self._serialise_search_recipe(recipe))
         if self._response_is_unknown_method(response, verb):
             # Server that does not support this method, get the whole graph.
             response = self._client.call_expecting_body(
@@ -968,11 +975,7 @@
     def get_data_stream_for_search(self, search):
         REQUEST_NAME = 'Repository.stream_revisions_chunked'
         path = self.bzrdir._path_for_remote_call(self._client)
-        recipe = search.get_recipe()
-        start_keys = ' '.join(recipe[0])
-        stop_keys = ' '.join(recipe[1])
-        count = str(recipe[2])
-        body = '\n'.join((start_keys, stop_keys, count))
+        body = self._serialise_search_recipe(search.get_recipe())
         response, protocol = self._client.call_with_body_bytes_expecting_body(
             REQUEST_NAME, (path,), body)
 
@@ -1033,6 +1036,17 @@
     def _make_parents_provider(self):
         return self
 
+    def _serialise_search_recipe(self, recipe):
+        """Serialise a graph search recipe.
+
+        :param recipe: A search recipe (start, stop, count).
+        :return: Serialised bytes.
+        """
+        start_keys = ' '.join(recipe[0])
+        stop_keys = ' '.join(recipe[1])
+        count = str(recipe[2])
+        return '\n'.join((start_keys, stop_keys, count))
+
 
 class RemoteBranchLockableFiles(LockableFiles):
     """A 'LockableFiles' implementation that talks to a smart server.

=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py	2008-01-17 07:47:52 +0000
+++ b/bzrlib/smart/repository.py	2008-01-18 03:43:19 +0000
@@ -58,33 +58,77 @@
         # is expected)
         return None
 
+    def recreate_search(self, repository, recipe_bytes):
+        lines = recipe_bytes.split('\n')
+        start_keys = set(lines[0].split(' '))
+        exclude_keys = set(lines[1].split(' '))
+        revision_count = int(lines[2])
+        repository.lock_read()
+        try:
+            search = repository.get_graph()._make_breadth_first_searcher(
+                start_keys)
+            while True:
+                try:
+                    next_revs = search.next()
+                except StopIteration:
+                    break
+                search.stop_searching_any(exclude_keys.intersection(next_revs))
+            search_result = search.get_result()
+            if search_result.get_recipe()[2] != revision_count:
+                # we got back a different amount of data than expected, this
+                # gets reported as NoSuchRevision, because less revisions
+                # indicates missing revisions, and more should never happen as
+                # the excludes list considers ghosts and ensures that ghost
+                # filling races are not a problem.
+                return (None, FailedSmartServerResponse(('NoSuchRevision',)))
+            return (search, None)
+        finally:
+            repository.unlock()
+
 
 class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
+    """Bzr 1.2+ - get parent data for revisions during a graph search."""
     
     def do_repository_request(self, repository, *revision_ids):
-        repository.lock_read()
-        try:
-            return self._do_repository_request(repository, revision_ids)
-        finally:
-            repository.unlock()
-
-    def _do_repository_request(self, repository, revision_ids):
         """Get parent details for some revisions.
         
         All the parents for revision_ids are returned. Additionally up to 64KB
         of additional parent data found by performing a breadth first search
-        from revision_ids is returned.
+        from revision_ids is returned. The verb takes a body containing the
+        current search state, see do_body for details.
 
         :param repository: The repository to query in.
         :param revision_ids: The utf8 encoded revision_id to answer for.
+        """
+        self._revision_ids = revision_ids
+        return None # Signal that we want a body.
+
+    def do_body(self, body_bytes):
+        """Process the current search state and perform the parent lookup.
+
         :return: A smart server response where the body contains an utf8
             encoded flattened list of the parents of the revisions, (the same
             format as Repository.get_revision_graph).
         """
+        repository = self._repository
+        repository.lock_read()
+        try:
+            return self._do_repository_request(body_bytes)
+        finally:
+            repository.unlock()
+
+    def _do_repository_request(self, body_bytes):
+        repository = self._repository
+        revision_ids = set(self._revision_ids)
+        search, error = self.recreate_search(repository, body_bytes)
+        if error is not None:
+            return error
+        # TODO might be nice to start up the search again; but thats not
+        # written or tested yet.
+        queried_revs = set(search.get_result().get_keys())
         lines = []
         repo_graph = repository.get_graph()
         result = {}
-        queried_revs = set()
         size_so_far = 0
         next_revs = revision_ids
         first_loop_done = False
@@ -342,31 +386,15 @@
     """Bzr 1.1+ streaming pull."""
 
     def do_body(self, body_bytes):
-        lines = body_bytes.split('\n')
-        start_keys = set(lines[0].split(' '))
-        exclude_keys = set(lines[1].split(' '))
-        revision_count = int(lines[2])
         repository = self._repository
         repository.lock_read()
         try:
-            search = repository.get_graph()._make_breadth_first_searcher(
-                start_keys)
-            while True:
-                try:
-                    next_revs = search.next()
-                except StopIteration:
-                    break
-                search.stop_searching_any(exclude_keys.intersection(next_revs))
-            search_result = search.get_result()
-            if search_result.get_recipe()[2] != revision_count:
-                # we got back a different amount of data than expected, this
-                # gets reported as NoSuchRevision, because less revisions
-                # indicates missing revisions, and more should never happen as
-                # the excludes list considers ghosts and ensures that ghost
-                # filling races are not a problem.
-                return FailedSmartServerResponse(('NoSuchRevision',))
-            stream = repository.get_data_stream_for_search(search_result)
+            search, error = self.recreate_search(repository, body_bytes)
+            if error is not None:
+                return error
+            stream = repository.get_data_stream_for_search(search.get_result())
         except Exception:
+            # On non-error, unlocking is done by the body stream handler.
             repository.unlock()
             raise
         return SuccessfulSmartServerResponse(('ok',),

=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py	2008-01-17 07:47:52 +0000
+++ b/bzrlib/tests/test_remote.py	2008-01-18 03:43:19 +0000
@@ -599,8 +599,8 @@
         parents = graph.get_parent_map([r1])
         self.assertEqual({r1: (NULL_REVISION,)}, parents)
         self.assertEqual(
-            [('call_expecting_body', 'Repository.get_parent_map',
-             ('quack/', r2))],
+            [('call_with_body_bytes_expecting_body',
+              'Repository.get_parent_map', ('quack/', r2), '\n\n0')],
             client._calls)
         repo.unlock()
         # now we call again, and it should use the second response.
@@ -609,10 +609,10 @@
         parents = graph.get_parent_map([r1])
         self.assertEqual({r1: (NULL_REVISION,)}, parents)
         self.assertEqual(
-            [('call_expecting_body', 'Repository.get_parent_map',
-              ('quack/', r2)),
-             ('call_expecting_body', 'Repository.get_parent_map',
-              ('quack/', r1))
+            [('call_with_body_bytes_expecting_body',
+              'Repository.get_parent_map', ('quack/', r2), '\n\n0'),
+             ('call_with_body_bytes_expecting_body',
+              'Repository.get_parent_map', ('quack/', r1), '\n\n0'),
             ],
             client._calls)
         repo.unlock()