Rev 3027: Fix bug #165061: Change the http readv code to require fewer ranges. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/extra_range_collapse_165061

John Arbash Meinel john at arbash-meinel.com
Mon Nov 26 21:37:27 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/extra_range_collapse_165061

------------------------------------------------------------
revno: 3027
revision-id:john at arbash-meinel.com-20071126213640-2asaghs4qr1zs3vg
parent: pqm at pqm.ubuntu.com-20071126151329-pnmm49obwettpwcd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: extra_range_collapse_165061
timestamp: Mon 2007-11-26 15:36:40 -0600
message:
  Fix bug #165061: Change the http readv code to require fewer ranges.
  This is mostly a workaround.
  Basically, we just pressure the collapsing code to try harder until it hits a
  semi-arbitrary maximum number of ranges.
  This should improve performance for packs when we are getting a large subset
  of the total pack.
modified:
  bzrlib/tests/test_http.py      testhttp.py-20051018020158-b2eef6e867c514d9
  bzrlib/transport/__init__.py   transport.py-20050711165921-4978aa7ce1285ad5
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http.py'
--- a/bzrlib/tests/test_http.py	2007-11-04 15:29:17 +0000
+++ b/bzrlib/tests/test_http.py	2007-11-26 21:36:40 +0000
@@ -653,6 +653,23 @@
         self.assertListRaises((errors.InvalidRange, errors.ShortReadvError,),
                               t.readv, 'a', [(12,2)])
 
+    def test_get_coalesced_offsets_too_many_ranges(self):
+        server = self.get_readonly_server()
+        t = self._transport(server.get_url())
+        t._bytes_to_read_before_seek = 1
+        offsets = ((0, 1), (1, 1), (5, 2), (29, 1))
+        self.assertEqual([_CoalescedOffset(0, 2, [(0, 1), (1, 1)]),
+                          _CoalescedOffset(5, 2, [(0, 2)]),
+                          _CoalescedOffset(29, 1, [(0, 1)]),
+                         ], t._get_coalesced_offsets(offsets))
+        t._max_readv_ranges = 2
+        # Restricting the max number of ranges will cause the fudge factor to
+        # increase until it can fit everything into no more than that many
+        # ranges.
+        self.assertEqual([_CoalescedOffset(0, 7, [(0, 1), (1, 1), (5, 2)]),
+                          _CoalescedOffset(29, 1, [(0, 1)]),
+                         ], t._get_coalesced_offsets(offsets))
+
 
 class TestSingleRangeRequestServer(TestRangeRequestServer):
     """Test readv against a server which accept only single range requests"""

=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py	2007-11-19 13:44:25 +0000
+++ b/bzrlib/transport/__init__.py	2007-11-26 21:36:40 +0000
@@ -234,6 +234,10 @@
         return cmp((self.start, self.length, self.ranges),
                    (other.start, other.length, other.ranges))
 
+    def __repr__(self):
+        return '%s(%r, %r, %r)' % (self.__class__.__name__,
+            self.start, self.length, self.ranges)
+
 
 class LateReadError(object):
     """A helper for transports which pretends to be a readable file.

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2007-08-26 22:10:51 +0000
+++ b/bzrlib/transport/http/__init__.py	2007-11-26 21:36:40 +0000
@@ -252,18 +252,36 @@
     # to avoid downloading the whole file.
     _max_readv_combined = 0
 
+    # Keep collapsing until we can fit all of the request into this many
+    # ranges. By default Apache has a limit of ~400. So we go underneath that
+    # amount to be safe.
+    _max_readv_ranges = 200
+
+    def _get_coalesced_offsets(self, offsets):
+        sorted_offsets = sorted(list(offsets))
+        fudge = self._bytes_to_read_before_seek
+        coalesced = list(self._coalesce_offsets(sorted_offsets,
+                                                limit=self._max_readv_combine,
+                                                fudge_factor=fudge))
+        while len(coalesced) > self._max_readv_ranges:
+            new_fudge = fudge * 2 + 1
+            mutter('http would request too many ranges (%d > %d),'
+                   'increasing fudge factor from %d to %d',
+                   len(coalesced), self._max_readv_ranges, fudge, new_fudge)
+            fudge = new_fudge
+            coalesced = list(self._coalesce_offsets(sorted_offsets,
+                            limit=self._max_readv_combine,
+                            fudge_factor=fudge))
+        return coalesced
+
     def _readv(self, relpath, offsets):
         """Get parts of the file at the given relative path.
 
         :param offsets: A list of (offset, size) tuples.
         :param return: A list or generator of (offset, data) tuples
         """
-        sorted_offsets = sorted(list(offsets))
-        fudge = self._bytes_to_read_before_seek
-        coalesced = self._coalesce_offsets(sorted_offsets,
-                                           limit=self._max_readv_combine,
-                                           fudge_factor=fudge)
-        coalesced = list(coalesced)
+        coalesced = self._get_coalesced_offsets(offsets)
+
         mutter('http readv of %s  offsets => %s collapsed %s',
                 relpath, len(offsets), len(coalesced))
 



More information about the bazaar-commits mailing list