Rev 2743: Merge readv latency support. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Aug 27 02:02:53 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2743
revision-id: robertc at robertcollins.net-20070827010243-rwfajn1hq1bh83az
parent: robertc at robertcollins.net-20070826233047-j65jjkr08zzy6zn5
parent: robertc at robertcollins.net-20070826221051-46uq33p3oqkscdd0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-08-27 11:02:43 +1000
message:
  Merge readv latency support.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
  bzrlib/transport/__init__.py   transport.py-20050711165921-4978aa7ce1285ad5
  bzrlib/transport/decorator.py  decorator.py-20060402223305-e913a0f25319ab42
  bzrlib/transport/fakevfat.py   fakevfat.py-20060407072414-d59939fa1d6c79d9
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/remote.py     ssh.py-20060608202016-c25gvf1ob7ypbus6-1
  bzrlib/transport/sftp.py       sftp.py-20051019050329-ab48ce71b7e32dfe
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.2.1
    revision-id: robertc at robertcollins.net-20070826221051-46uq33p3oqkscdd0
    parent: pqm at pqm.ubuntu.com-20070823005013-ada9x55rc31yiwou
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: transport-get-file
    timestamp: Mon 2007-08-27 08:10:51 +1000
    message:
      * New parameter on ``bzrlib.transport.Transport.readv``
        ``adjust_for_latency`` which changes readv from returning strictly the
        requested data to inserted return larger ranges and in forward read order
        to reduce the effect of network latency. (Robert Collins)
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
      bzrlib/transport/__init__.py   transport.py-20050711165921-4978aa7ce1285ad5
      bzrlib/transport/decorator.py  decorator.py-20060402223305-e913a0f25319ab42
      bzrlib/transport/fakevfat.py   fakevfat.py-20060407072414-d59939fa1d6c79d9
      bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
      bzrlib/transport/remote.py     ssh.py-20060608202016-c25gvf1ob7ypbus6-1
      bzrlib/transport/sftp.py       sftp.py-20051019050329-ab48ce71b7e32dfe
=== modified file 'NEWS'
--- a/NEWS	2007-08-26 23:30:47 +0000
+++ b/NEWS	2007-08-27 01:02:43 +0000
@@ -80,6 +80,11 @@
       incremental addition of data to a file without requiring that all the
       data be buffered in memory. (Robert Collins)
 
+    * New parameter on ``bzrlib.transport.Transport.readv``
+      ``adjust_for_latency`` which changes readv from returning strictly the
+      requested data to inserted return larger ranges and in forward read order
+      to reduce the effect of network latency. (Robert Collins)
+
 
 bzr 0.90 2007-08-??
 ===================

=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py	2007-08-22 06:23:52 +0000
+++ b/bzrlib/tests/test_transport_implementations.py	2007-08-27 01:02:43 +0000
@@ -1485,6 +1485,70 @@
         self.assertEqual(d[2], (0, '0'))
         self.assertEqual(d[3], (3, '34'))
 
+    def test_readv_with_adjust_for_latency(self):
+        transport = self.get_transport()
+        # the adjust for latency flag expands the data region returned
+        # according to a per-transport heuristic, so testing is a little
+        # tricky as we need more data than the largest combining that our
+        # transports do. To accomodate this we generate random data and cross
+        # reference the returned data with the random data. To avoid doing
+        # multiple large random byte look ups we do several tests on the same
+        # backing data.
+        content = osutils.rand_bytes(200*1024)
+        if transport.is_readonly():
+            file('a', 'w').write(content)
+        else:
+            transport.put_bytes('a', content)
+        def check_result_data(result_vector):
+            for item in result_vector:
+                data_len = len(item[1])
+                self.assertEqual(content[item[0]:item[0] + data_len], item[1])
+
+        # start corner case
+        result = list(transport.readv('a', ((0, 30),),
+            adjust_for_latency=True))
+        # we expect 1 result, from 0, to something > 30
+        self.assertEqual(1, len(result))
+        self.assertEqual(0, result[0][0])
+        self.assertTrue(len(result[0][1]) >= 30)
+        check_result_data(result)
+        # end of file corner case
+        result = list(transport.readv('a', ((204700, 100),),
+            adjust_for_latency=True))
+        # we expect 1 result, from 204800- its length, to the end
+        self.assertEqual(1, len(result))
+        data_len = len(result[0][1])
+        self.assertEqual(204800-data_len, result[0][0])
+        self.assertTrue(data_len >= 100)
+        check_result_data(result)
+        # out of order ranges are made in order
+        result = list(transport.readv('a', ((204700, 100), (0, 50)),
+            adjust_for_latency=True))
+        # we expect 2 results, in order, start and end.
+        self.assertEqual(2, len(result))
+        # start
+        data_len = len(result[0][1])
+        self.assertEqual(0, result[0][0])
+        self.assertTrue(data_len >= 30)
+        # end
+        data_len = len(result[1][1])
+        self.assertEqual(204800-data_len, result[1][0])
+        self.assertTrue(data_len >= 100)
+        check_result_data(result)
+        # close ranges get combined (even if out of order)
+        for request_vector in [((400,50), (800, 234)), ((800, 234), (400,50))]:
+            result = list(transport.readv('a', request_vector,
+                adjust_for_latency=True))
+            self.assertEqual(1, len(result))
+            data_len = len(result[0][1])
+            # minimmum length is from 400 to 1034 - 634
+            self.assertTrue(data_len >= 634)
+            # must contain the region 400 to 1034
+            self.assertTrue(result[0][0] <= 400)
+            self.assertTrue(result[0][0] + data_len >= 1034)
+            check_result_data(result)
+        
+
     def test_get_with_open_write_stream_sees_all_content(self):
         t = self.get_transport()
         if t.is_readonly():

=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py	2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/__init__.py	2007-08-27 01:02:43 +0000
@@ -635,7 +635,56 @@
         """
         raise errors.NoSmartMedium(self)
 
-    def readv(self, relpath, offsets):
+    def readv(self, relpath, offsets, adjust_for_latency=False):
+        """Get parts of the file at the given relative path.
+
+        :param relpath: The path to read data from.
+        :param offsets: A list of (offset, size) tuples.
+        :param adjust_for_latency: Adjust the requested offsets to accomdate
+            transport latency. This may re-order the offsets, expand them to
+            grab adjacent data when there is likely a high cost to requesting
+            data relative to delivering it.
+        :return: A list or generator of (offset, data) tuples
+        """
+        if adjust_for_latency:
+            offsets = sorted(offsets)
+            # short circuit empty requests
+            if len(offsets) == 0:
+                def empty_yielder():
+                    # Quick thunk to stop this function becoming a generator
+                    # itself, rather we return a generator that has nothing to
+                    # yield.
+                    if False:
+                        yield None
+                return empty_yielder()
+            # expand by page size at either end
+            expansion = self.recommended_page_size() / 2
+            new_offsets = []
+            for offset, length in offsets:
+                new_offset = offset - expansion
+                new_length = length + expansion
+                if new_offset < 0:
+                    # don't ask for anything < 0
+                    new_length -= new_offset
+                    new_offset = 0
+                new_offsets.append((new_offset, new_length))
+            # combine the expanded offsets
+            offsets = []
+            current_offset, current_length = new_offsets[0]
+            current_finish = current_length + current_offset
+            for offset, length in new_offsets[1:]:
+                if offset > current_finish:
+                    offsets.append((current_offset, current_length))
+                    current_offset = offset
+                    current_length = length
+                    continue
+                finish = offset + length
+                if finish > current_finish:
+                    current_finish = finish
+            offsets.append((current_offset, current_length))
+        return self._readv(relpath, offsets)
+
+    def _readv(self, relpath, offsets):
         """Get parts of the file at the given relative path.
 
         :offsets: A list of (offset, size) tuples.

=== modified file 'bzrlib/transport/decorator.py'
--- a/bzrlib/transport/decorator.py	2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/decorator.py	2007-08-27 01:02:43 +0000
@@ -138,6 +138,10 @@
         """See Transport.list_dir()."""
         return self._decorated.list_dir(relpath)
 
+    def _readv(self, relpath, offsets):
+        """See Transport._readv."""
+        return self._decorated._readv(relpath, offsets)
+
     def recommended_page_size(self):
         """See Transport.recommended_page_size()."""
         return self._decorated.recommended_page_size()

=== modified file 'bzrlib/transport/fakevfat.py'
--- a/bzrlib/transport/fakevfat.py	2007-08-15 06:53:07 +0000
+++ b/bzrlib/transport/fakevfat.py	2007-08-26 22:10:51 +0000
@@ -92,7 +92,7 @@
     def has(self, relpath):
         return self._decorated.has(self._squash_name(relpath))
 
-    def readv(self, relpath, offsets):
+    def _readv(self, relpath, offsets):
         return self._decorated.readv(self._squash_name(relpath), offsets)
 
     def put_file(self, relpath, f, mode=None):

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2007-08-05 01:47:30 +0000
+++ b/bzrlib/transport/http/__init__.py	2007-08-26 22:10:51 +0000
@@ -252,7 +252,7 @@
     # to avoid downloading the whole file.
     _max_readv_combined = 0
 
-    def readv(self, relpath, offsets):
+    def _readv(self, relpath, offsets):
         """Get parts of the file at the given relative path.
 
         :param offsets: A list of (offset, size) tuples.

=== modified file 'bzrlib/transport/remote.py'
--- a/bzrlib/transport/remote.py	2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/remote.py	2007-08-27 01:02:43 +0000
@@ -290,7 +290,7 @@
         # the external path for RemoteTransports is the base
         return self.base
 
-    def readv(self, relpath, offsets):
+    def _readv(self, relpath, offsets):
         if not offsets:
             return
 

=== modified file 'bzrlib/transport/sftp.py'
--- a/bzrlib/transport/sftp.py	2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/sftp.py	2007-08-27 01:02:43 +0000
@@ -247,7 +247,7 @@
             self._translate_io_exception(e, path, ': error retrieving',
                 failure_exc=errors.ReadError)
 
-    def readv(self, relpath, offsets):
+    def _readv(self, relpath, offsets):
         """See Transport.readv()"""
         # We overload the default readv() because we want to use a file
         # that does not have prefetch enabled.



More information about the bazaar-commits mailing list