Rev 3194: Change the smart server verb for Repository.stream_revisions_chunked to use SearchResults as the request mechanism for downloads. in http://people.ubuntu.com/~robertc/baz2.0/search-results

Robert Collins robertc at robertcollins.net
Thu Jan 17 07:48:05 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/search-results

------------------------------------------------------------
revno: 3194
revision-id:robertc at robertcollins.net-20080117074752-1d1gyckmu7ej1i4u
parent: robertc at robertcollins.net-20080117053053-2vx6ff0yr2bo9f2v
committer: Robert Collins <robertc at robertcollins.net>
branch nick: stream-revisions-for-search
timestamp: Thu 2008-01-17 18:47:52 +1100
message:
  Change the smart server verb for Repository.stream_revisions_chunked to use SearchResults as the request mechanism for downloads.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/smart/client.py         client.py-20061116014825-2k6ada6xgulslami-1
  bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
  bzrlib/smart/request.py        request.py-20061108095550-gunadhxmzkdjfeek-1
  bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
  bzrlib/tests/test_smart.py     test_smart.py-20061122024551-ol0l0o0oofsu9b3t-2
=== modified file 'NEWS'
--- a/NEWS	2008-01-17 05:30:53 +0000
+++ b/NEWS	2008-01-17 07:47:52 +0000
@@ -25,7 +25,8 @@
    * New smart method, ``Repository.stream_revisions_chunked``, for fetching
      revision data that streams revision data via a chunked encoding.  This
      avoids buffering large amounts of revision data on the server and on the
-     client.  (Andrew Bennetts, #178353)
+     client, and sends less data to the server to request the revisions.
+     (Andrew Bennetts, Robert Collins, #178353)
 
    * diff '--using' allows an external diff tool to be used for files.
      (Aaron Bentley)

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2008-01-17 05:30:53 +0000
+++ b/bzrlib/remote.py	2008-01-17 07:47:52 +0000
@@ -966,14 +966,22 @@
         return self._real_repository.has_signature_for_revision_id(revision_id)
 
     def get_data_stream_for_search(self, search):
-        revision_ids = search.get_keys()
         REQUEST_NAME = 'Repository.stream_revisions_chunked'
         path = self.bzrdir._path_for_remote_call(self._client)
-        response, protocol = self._client.call_expecting_body(
-            REQUEST_NAME, path, *revision_ids)
+        recipe = search.get_recipe()
+        start_keys = ' '.join(recipe[0])
+        stop_keys = ' '.join(recipe[1])
+        count = str(recipe[2])
+        body = '\n'.join((start_keys, stop_keys, count))
+        response, protocol = self._client.call_with_body_bytes_expecting_body(
+            REQUEST_NAME, (path,), body)
 
         if response == ('ok',):
             return self._deserialise_stream(protocol)
+        if response == ('NoSuchRevision', ):
+            # We cannot easily identify the revision that is missing in this
+            # situation without doing much more network IO. For now, bail.
+            raise NoSuchRevision(self, "unknown")
         elif (response == ('error', "Generic bzr smart protocol error: "
                 "bad request '%s'" % REQUEST_NAME) or
               response == ('error', "Generic bzr smart protocol error: "

=== modified file 'bzrlib/smart/client.py'
--- a/bzrlib/smart/client.py	2008-01-14 22:45:15 +0000
+++ b/bzrlib/smart/client.py	2008-01-17 07:47:52 +0000
@@ -65,6 +65,20 @@
         smart_protocol.call_with_body_bytes((method, ) + args, body)
         return smart_protocol.read_response_tuple()
 
+    def call_with_body_bytes_expecting_body(self, method, args, body):
+        """Call a method on the remote server with body bytes."""
+        if type(method) is not str:
+            raise TypeError('method must be a byte string, not %r' % (method,))
+        for arg in args:
+            if type(arg) is not str:
+                raise TypeError('args must be byte strings, not %r' % (args,))
+        if type(body) is not str:
+            raise TypeError('body must be byte string, not %r' % (body,))
+        request = self.get_smart_medium().get_request()
+        smart_protocol = protocol.SmartClientRequestProtocolTwo(request)
+        smart_protocol.call_with_body_bytes((method, ) + args, body)
+        return smart_protocol.read_response_tuple(expect_body=True), smart_protocol
+
     def remote_path_from_transport(self, transport):
         """Convert transport into a path suitable for using in a request.
         

=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py	2008-01-17 05:30:53 +0000
+++ b/bzrlib/smart/repository.py	2008-01-17 07:47:52 +0000
@@ -48,8 +48,15 @@
         """
         transport = self._backing_transport.clone(path)
         bzrdir = BzrDir.open_from_transport(transport)
-        repository = bzrdir.open_repository()
-        return self.do_repository_request(repository, *args)
+        # Save the repository for use with do_body.
+        self._repository = bzrdir.open_repository()
+        return self.do_repository_request(self._repository, *args)
+
+    def do_repository_request(self, repository, *args):
+        """Override to provide an implementation for a verb."""
+        # No-op for verbs that take bodies (None as a result indicates a body
+        # is expected)
+        return None
 
 
 class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
@@ -307,6 +314,7 @@
 
 
 class SmartServerRepositoryStreamKnitDataForRevisions(SmartServerRepositoryRequest):
+    """Bzr <= 1.1 streaming pull, buffers all data on server."""
 
     def do_repository_request(self, repository, *revision_ids):
         repository.lock_read()
@@ -331,12 +339,33 @@
 
 
 class SmartServerRepositoryStreamRevisionsChunked(SmartServerRepositoryRequest):
+    """Bzr 1.1+ streaming pull."""
 
-    def do_repository_request(self, repository, *revision_ids):
+    def do_body(self, body_bytes):
+        lines = body_bytes.split('\n')
+        start_keys = set(lines[0].split(' '))
+        exclude_keys = set(lines[1].split(' '))
+        revision_count = int(lines[2])
+        repository = self._repository
         repository.lock_read()
         try:
-            stream = repository.get_data_stream_for_search(
-                repository.revision_ids_to_search_result(set(revision_ids)))
+            search = repository.get_graph()._make_breadth_first_searcher(
+                start_keys)
+            while True:
+                try:
+                    next_revs = search.next()
+                except StopIteration:
+                    break
+                search.stop_searching_any(exclude_keys.intersection(next_revs))
+            search_result = search.get_result()
+            if search_result.get_recipe()[2] != revision_count:
+                # we got back a different amount of data than expected, this
+                # gets reported as NoSuchRevision, because less revisions
+                # indicates missing revisions, and more should never happen as
+                # the excludes list considers ghosts and ensures that ghost
+                # filling races are not a problem.
+                return FailedSmartServerResponse(('NoSuchRevision',))
+            stream = repository.get_data_stream_for_search(search_result)
         except Exception:
             repository.unlock()
             raise
@@ -350,6 +379,8 @@
             for name_tuple, bytes in stream:
                 yield pack.bytes_record(bytes, [name_tuple])
         except errors.RevisionNotPresent, e:
+            # This shouldn't be able to happen, but as we don't buffer
+            # everything it can in theory happen.
             yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
         repository.unlock()
         pack.end()

=== modified file 'bzrlib/smart/request.py'
--- a/bzrlib/smart/request.py	2008-01-14 04:46:08 +0000
+++ b/bzrlib/smart/request.py	2008-01-17 07:47:52 +0000
@@ -66,6 +66,8 @@
 
     def do_body(self, body_bytes):
         """Called if the client sends a body with the request.
+
+        The do() method is still called, and must have returned None.
         
         Must return a SmartServerResponse.
         """
@@ -330,10 +332,6 @@
     'bzrlib.smart.repository',
     'SmartServerRepositoryStreamRevisionsChunked')
 request_handlers.register_lazy(
-    'Repository.chunked_stream_knit_data_for_revisions',
-    'bzrlib.smart.repository',
-    'SmartServerRepositoryStreamKnitDataForRevisions')
-request_handlers.register_lazy(
     'Repository.get_revision_graph', 'bzrlib.smart.repository', 'SmartServerRepositoryGetRevisionGraph')
 request_handlers.register_lazy(
     'Repository.has_revision', 'bzrlib.smart.repository', 'SmartServerRequestHasRevision')

=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py	2008-01-17 05:30:53 +0000
+++ b/bzrlib/tests/test_remote.py	2008-01-17 07:47:52 +0000
@@ -152,6 +152,13 @@
         self.expecting_body = True
         return result[0], FakeProtocol(result[1], self)
 
+    def call_with_body_bytes_expecting_body(self, method, args, body):
+        self._calls.append(('call_with_body_bytes_expecting_body', method,
+            args, body))
+        result = self.responses.pop(0)
+        self.expecting_body = True
+        return result[0], FakeProtocol(result[1], self)
+
 
 class FakeMedium(object):
 

=== modified file 'bzrlib/tests/test_smart.py'
--- a/bzrlib/tests/test_smart.py	2008-01-14 04:46:08 +0000
+++ b/bzrlib/tests/test_smart.py	2008-01-17 07:47:52 +0000
@@ -829,11 +829,13 @@
         tree.add('')
         rev_id1_utf8 = u'\xc8'.encode('utf-8')
         rev_id2_utf8 = u'\xc9'.encode('utf-8')
-        r1 = tree.commit('1st commit', rev_id=rev_id1_utf8)
-        r1 = tree.commit('2nd commit', rev_id=rev_id2_utf8)
+        tree.commit('1st commit', rev_id=rev_id1_utf8)
+        tree.commit('2nd commit', rev_id=rev_id2_utf8)
         tree.unlock()
 
-        response = request.execute(backing.local_abspath(''), rev_id2_utf8)
+        response = request.execute(backing.local_abspath(''))
+        self.assertEqual(None, response)
+        response = request.do_body("%s\n%s\n1" % (rev_id2_utf8, rev_id1_utf8))
         self.assertEqual(('ok',), response.args)
         from cStringIO import StringIO
         parser = pack.ContainerPushParser()
@@ -853,7 +855,10 @@
             backing)
         repo = self.make_repository('.')
         rev_id1_utf8 = u'\xc8'.encode('utf-8')
-        response = request.execute(backing.local_abspath(''), rev_id1_utf8)
+        response = request.execute(backing.local_abspath(''))
+        self.assertEqual(None, response)
+        response = request.do_body("%s\n\n1" % (rev_id1_utf8,))
+        self.assertEqual(('ok',), response.args)
         # There's no error initially.
         self.assertTrue(response.is_successful())
         self.assertEqual(('ok',), response.args)
@@ -865,6 +870,19 @@
             last_chunk,
             FailedSmartServerResponse(('NoSuchRevision', rev_id1_utf8)))
 
+    def test_no_such_revision_error(self):
+        backing = self.get_transport()
+        request = smart.repository.SmartServerRepositoryStreamRevisionsChunked(
+            backing)
+        repo = self.make_repository('.')
+        rev_id1_utf8 = u'\xc8'.encode('utf-8')
+        response = request.execute(backing.local_abspath(''))
+        self.assertEqual(None, response)
+        response = request.do_body("%s\n\n1" % (rev_id1_utf8,))
+        self.assertEqual(
+            FailedSmartServerResponse(('NoSuchRevision', )),
+            response)
+
 
 class TestSmartServerIsReadonly(tests.TestCaseWithTransport):
 
@@ -935,10 +953,6 @@
             smart.request.request_handlers.get('Repository.lock_write'),
             smart.repository.SmartServerRepositoryLockWrite)
         self.assertEqual(
-            smart.request.request_handlers.get(
-                'Repository.chunked_stream_knit_data_for_revisions'),
-            smart.repository.SmartServerRepositoryStreamKnitDataForRevisions)
-        self.assertEqual(
             smart.request.request_handlers.get('Repository.tarball'),
             smart.repository.SmartServerRepositoryTarball)
         self.assertEqual(



More information about the bazaar-commits mailing list