Rev 2743: Merge readv latency support. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Mon Aug 27 02:02:53 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2743
revision-id: robertc at robertcollins.net-20070827010243-rwfajn1hq1bh83az
parent: robertc at robertcollins.net-20070826233047-j65jjkr08zzy6zn5
parent: robertc at robertcollins.net-20070826221051-46uq33p3oqkscdd0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-08-27 11:02:43 +1000
message:
Merge readv latency support.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/transport/decorator.py decorator.py-20060402223305-e913a0f25319ab42
bzrlib/transport/fakevfat.py fakevfat.py-20060407072414-d59939fa1d6c79d9
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/remote.py ssh.py-20060608202016-c25gvf1ob7ypbus6-1
bzrlib/transport/sftp.py sftp.py-20051019050329-ab48ce71b7e32dfe
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.2.1
revision-id: robertc at robertcollins.net-20070826221051-46uq33p3oqkscdd0
parent: pqm at pqm.ubuntu.com-20070823005013-ada9x55rc31yiwou
committer: Robert Collins <robertc at robertcollins.net>
branch nick: transport-get-file
timestamp: Mon 2007-08-27 08:10:51 +1000
message:
* New parameter on ``bzrlib.transport.Transport.readv``
``adjust_for_latency`` which changes readv from returning strictly the
requested data to inserted return larger ranges and in forward read order
to reduce the effect of network latency. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/transport/decorator.py decorator.py-20060402223305-e913a0f25319ab42
bzrlib/transport/fakevfat.py fakevfat.py-20060407072414-d59939fa1d6c79d9
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/remote.py ssh.py-20060608202016-c25gvf1ob7ypbus6-1
bzrlib/transport/sftp.py sftp.py-20051019050329-ab48ce71b7e32dfe
=== modified file 'NEWS'
--- a/NEWS 2007-08-26 23:30:47 +0000
+++ b/NEWS 2007-08-27 01:02:43 +0000
@@ -80,6 +80,11 @@
incremental addition of data to a file without requiring that all the
data be buffered in memory. (Robert Collins)
+ * New parameter on ``bzrlib.transport.Transport.readv``
+ ``adjust_for_latency`` which changes readv from returning strictly the
+ requested data to inserted return larger ranges and in forward read order
+ to reduce the effect of network latency. (Robert Collins)
+
bzr 0.90 2007-08-??
===================
=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py 2007-08-22 06:23:52 +0000
+++ b/bzrlib/tests/test_transport_implementations.py 2007-08-27 01:02:43 +0000
@@ -1485,6 +1485,70 @@
self.assertEqual(d[2], (0, '0'))
self.assertEqual(d[3], (3, '34'))
+ def test_readv_with_adjust_for_latency(self):
+ transport = self.get_transport()
+ # the adjust for latency flag expands the data region returned
+ # according to a per-transport heuristic, so testing is a little
+ # tricky as we need more data than the largest combining that our
+ # transports do. To accomodate this we generate random data and cross
+ # reference the returned data with the random data. To avoid doing
+ # multiple large random byte look ups we do several tests on the same
+ # backing data.
+ content = osutils.rand_bytes(200*1024)
+ if transport.is_readonly():
+ file('a', 'w').write(content)
+ else:
+ transport.put_bytes('a', content)
+ def check_result_data(result_vector):
+ for item in result_vector:
+ data_len = len(item[1])
+ self.assertEqual(content[item[0]:item[0] + data_len], item[1])
+
+ # start corner case
+ result = list(transport.readv('a', ((0, 30),),
+ adjust_for_latency=True))
+ # we expect 1 result, from 0, to something > 30
+ self.assertEqual(1, len(result))
+ self.assertEqual(0, result[0][0])
+ self.assertTrue(len(result[0][1]) >= 30)
+ check_result_data(result)
+ # end of file corner case
+ result = list(transport.readv('a', ((204700, 100),),
+ adjust_for_latency=True))
+ # we expect 1 result, from 204800- its length, to the end
+ self.assertEqual(1, len(result))
+ data_len = len(result[0][1])
+ self.assertEqual(204800-data_len, result[0][0])
+ self.assertTrue(data_len >= 100)
+ check_result_data(result)
+ # out of order ranges are made in order
+ result = list(transport.readv('a', ((204700, 100), (0, 50)),
+ adjust_for_latency=True))
+ # we expect 2 results, in order, start and end.
+ self.assertEqual(2, len(result))
+ # start
+ data_len = len(result[0][1])
+ self.assertEqual(0, result[0][0])
+ self.assertTrue(data_len >= 30)
+ # end
+ data_len = len(result[1][1])
+ self.assertEqual(204800-data_len, result[1][0])
+ self.assertTrue(data_len >= 100)
+ check_result_data(result)
+ # close ranges get combined (even if out of order)
+ for request_vector in [((400,50), (800, 234)), ((800, 234), (400,50))]:
+ result = list(transport.readv('a', request_vector,
+ adjust_for_latency=True))
+ self.assertEqual(1, len(result))
+ data_len = len(result[0][1])
+ # minimmum length is from 400 to 1034 - 634
+ self.assertTrue(data_len >= 634)
+ # must contain the region 400 to 1034
+ self.assertTrue(result[0][0] <= 400)
+ self.assertTrue(result[0][0] + data_len >= 1034)
+ check_result_data(result)
+
+
def test_get_with_open_write_stream_sees_all_content(self):
t = self.get_transport()
if t.is_readonly():
=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py 2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/__init__.py 2007-08-27 01:02:43 +0000
@@ -635,7 +635,56 @@
"""
raise errors.NoSmartMedium(self)
- def readv(self, relpath, offsets):
+ def readv(self, relpath, offsets, adjust_for_latency=False):
+ """Get parts of the file at the given relative path.
+
+ :param relpath: The path to read data from.
+ :param offsets: A list of (offset, size) tuples.
+ :param adjust_for_latency: Adjust the requested offsets to accomdate
+ transport latency. This may re-order the offsets, expand them to
+ grab adjacent data when there is likely a high cost to requesting
+ data relative to delivering it.
+ :return: A list or generator of (offset, data) tuples
+ """
+ if adjust_for_latency:
+ offsets = sorted(offsets)
+ # short circuit empty requests
+ if len(offsets) == 0:
+ def empty_yielder():
+ # Quick thunk to stop this function becoming a generator
+ # itself, rather we return a generator that has nothing to
+ # yield.
+ if False:
+ yield None
+ return empty_yielder()
+ # expand by page size at either end
+ expansion = self.recommended_page_size() / 2
+ new_offsets = []
+ for offset, length in offsets:
+ new_offset = offset - expansion
+ new_length = length + expansion
+ if new_offset < 0:
+ # don't ask for anything < 0
+ new_length -= new_offset
+ new_offset = 0
+ new_offsets.append((new_offset, new_length))
+ # combine the expanded offsets
+ offsets = []
+ current_offset, current_length = new_offsets[0]
+ current_finish = current_length + current_offset
+ for offset, length in new_offsets[1:]:
+ if offset > current_finish:
+ offsets.append((current_offset, current_length))
+ current_offset = offset
+ current_length = length
+ continue
+ finish = offset + length
+ if finish > current_finish:
+ current_finish = finish
+ offsets.append((current_offset, current_length))
+ return self._readv(relpath, offsets)
+
+ def _readv(self, relpath, offsets):
"""Get parts of the file at the given relative path.
:offsets: A list of (offset, size) tuples.
=== modified file 'bzrlib/transport/decorator.py'
--- a/bzrlib/transport/decorator.py 2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/decorator.py 2007-08-27 01:02:43 +0000
@@ -138,6 +138,10 @@
"""See Transport.list_dir()."""
return self._decorated.list_dir(relpath)
+ def _readv(self, relpath, offsets):
+ """See Transport._readv."""
+ return self._decorated._readv(relpath, offsets)
+
def recommended_page_size(self):
"""See Transport.recommended_page_size()."""
return self._decorated.recommended_page_size()
=== modified file 'bzrlib/transport/fakevfat.py'
--- a/bzrlib/transport/fakevfat.py 2007-08-15 06:53:07 +0000
+++ b/bzrlib/transport/fakevfat.py 2007-08-26 22:10:51 +0000
@@ -92,7 +92,7 @@
def has(self, relpath):
return self._decorated.has(self._squash_name(relpath))
- def readv(self, relpath, offsets):
+ def _readv(self, relpath, offsets):
return self._decorated.readv(self._squash_name(relpath), offsets)
def put_file(self, relpath, f, mode=None):
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2007-08-05 01:47:30 +0000
+++ b/bzrlib/transport/http/__init__.py 2007-08-26 22:10:51 +0000
@@ -252,7 +252,7 @@
# to avoid downloading the whole file.
_max_readv_combined = 0
- def readv(self, relpath, offsets):
+ def _readv(self, relpath, offsets):
"""Get parts of the file at the given relative path.
:param offsets: A list of (offset, size) tuples.
=== modified file 'bzrlib/transport/remote.py'
--- a/bzrlib/transport/remote.py 2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/remote.py 2007-08-27 01:02:43 +0000
@@ -290,7 +290,7 @@
# the external path for RemoteTransports is the base
return self.base
- def readv(self, relpath, offsets):
+ def _readv(self, relpath, offsets):
if not offsets:
return
=== modified file 'bzrlib/transport/sftp.py'
--- a/bzrlib/transport/sftp.py 2007-08-22 08:09:05 +0000
+++ b/bzrlib/transport/sftp.py 2007-08-27 01:02:43 +0000
@@ -247,7 +247,7 @@
self._translate_io_exception(e, path, ': error retrieving',
failure_exc=errors.ReadError)
- def readv(self, relpath, offsets):
+ def _readv(self, relpath, offsets):
"""See Transport.readv()"""
# We overload the default readv() because we want to use a file
# that does not have prefetch enabled.
More information about the bazaar-commits
mailing list