Rev 3027: Fix bug #165061: Change the http readv code to require fewer ranges. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/extra_range_collapse_165061
John Arbash Meinel
john at arbash-meinel.com
Mon Nov 26 21:37:27 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/extra_range_collapse_165061
------------------------------------------------------------
revno: 3027
revision-id:john at arbash-meinel.com-20071126213640-2asaghs4qr1zs3vg
parent: pqm at pqm.ubuntu.com-20071126151329-pnmm49obwettpwcd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: extra_range_collapse_165061
timestamp: Mon 2007-11-26 15:36:40 -0600
message:
Fix bug #165061: Change the http readv code to require fewer ranges.
This is mostly a workaround.
Basically, we just pressure the collapsing code to try harder until it hits a
semi-arbitrary maximum number of ranges.
This should improve performance for packs when we are getting a large subset
of the total pack.
modified:
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http.py'
--- a/bzrlib/tests/test_http.py 2007-11-04 15:29:17 +0000
+++ b/bzrlib/tests/test_http.py 2007-11-26 21:36:40 +0000
@@ -653,6 +653,23 @@
self.assertListRaises((errors.InvalidRange, errors.ShortReadvError,),
t.readv, 'a', [(12,2)])
+ def test_get_coalesced_offsets_too_many_ranges(self):
+ server = self.get_readonly_server()
+ t = self._transport(server.get_url())
+ t._bytes_to_read_before_seek = 1
+ offsets = ((0, 1), (1, 1), (5, 2), (29, 1))
+ self.assertEqual([_CoalescedOffset(0, 2, [(0, 1), (1, 1)]),
+ _CoalescedOffset(5, 2, [(0, 2)]),
+ _CoalescedOffset(29, 1, [(0, 1)]),
+ ], t._get_coalesced_offsets(offsets))
+ t._max_readv_ranges = 2
+ # Restricting the max number of ranges will cause the fudge factor to
+ # increase until it can fit everything into no more than that many
+ # ranges.
+ self.assertEqual([_CoalescedOffset(0, 7, [(0, 1), (1, 1), (5, 2)]),
+ _CoalescedOffset(29, 1, [(0, 1)]),
+ ], t._get_coalesced_offsets(offsets))
+
class TestSingleRangeRequestServer(TestRangeRequestServer):
"""Test readv against a server which accept only single range requests"""
=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py 2007-11-19 13:44:25 +0000
+++ b/bzrlib/transport/__init__.py 2007-11-26 21:36:40 +0000
@@ -234,6 +234,10 @@
return cmp((self.start, self.length, self.ranges),
(other.start, other.length, other.ranges))
+ def __repr__(self):
+ return '%s(%r, %r, %r)' % (self.__class__.__name__,
+ self.start, self.length, self.ranges)
+
class LateReadError(object):
"""A helper for transports which pretends to be a readable file.
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2007-08-26 22:10:51 +0000
+++ b/bzrlib/transport/http/__init__.py 2007-11-26 21:36:40 +0000
@@ -252,18 +252,36 @@
# to avoid downloading the whole file.
_max_readv_combined = 0
+ # Keep collapsing until we can fit all of the request into this many
+ # ranges. By default Apache has a limit of ~400. So we go underneath that
+ # amount to be safe.
+ _max_readv_ranges = 200
+
+ def _get_coalesced_offsets(self, offsets):
+ sorted_offsets = sorted(list(offsets))
+ fudge = self._bytes_to_read_before_seek
+ coalesced = list(self._coalesce_offsets(sorted_offsets,
+ limit=self._max_readv_combine,
+ fudge_factor=fudge))
+ while len(coalesced) > self._max_readv_ranges:
+ new_fudge = fudge * 2 + 1
+ mutter('http would request too many ranges (%d > %d),'
+ 'increasing fudge factor from %d to %d',
+ len(coalesced), self._max_readv_ranges, fudge, new_fudge)
+ fudge = new_fudge
+ coalesced = list(self._coalesce_offsets(sorted_offsets,
+ limit=self._max_readv_combine,
+ fudge_factor=fudge))
+ return coalesced
+
def _readv(self, relpath, offsets):
"""Get parts of the file at the given relative path.
:param offsets: A list of (offset, size) tuples.
:param return: A list or generator of (offset, data) tuples
"""
- sorted_offsets = sorted(list(offsets))
- fudge = self._bytes_to_read_before_seek
- coalesced = self._coalesce_offsets(sorted_offsets,
- limit=self._max_readv_combine,
- fudge_factor=fudge)
- coalesced = list(coalesced)
+ coalesced = self._get_coalesced_offsets(offsets)
+
mutter('http readv of %s offsets => %s collapsed %s',
relpath, len(offsets), len(coalesced))
More information about the bazaar-commits
mailing list