Rev 2521: First step to fix #115209 use _coalesce_offsets like other transports. in file:///v/home/vila/src/bugs/115209/
Vincent Ladeuil
v.ladeuil+lp at free.fr
Tue Jun 12 15:49:01 BST 2007
At file:///v/home/vila/src/bugs/115209/
------------------------------------------------------------
revno: 2521
revision-id: v.ladeuil+lp at free.fr-20070612144857-1swhz5c3qo8n76l8
parent: pqm at pqm.ubuntu.com-20070608134340-flu6dlpzyo7izrrs
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 115209
timestamp: Tue 2007-06-12 16:48:57 +0200
message:
First step to fix #115209 use _coalesce_offsets like other transports.
* bzrlib/transport/http/__init__.py:
(HttpTransportBase._bytes_to_read_before_seek): Setting it to 512
to minimize additional bytes transferred but still reducing number
of ranges.
(HttpTransportBase.readv): _coalesce_offsets does a better job
than offset_to_ranges/ Reduce code duplication too.
(HttpTransportBase.offsets_to_ranges): Deprecated.
(HttpTransportBase._attempted_range_header): Made private. Adapted
to offsets.
(HttpTransportBase._range_header): Made private. Adapted to
offsets.
* bzrlib/transport/http/_urllib.py:
(HttpTransport_urllib._get): s/ranges/offsets/
* bzrlib/transport/http/_pycurl.py:
(PyCurlTransport._get, PyCurlTransport._get_ranged): s/ranges/offsets/
* bzrlib/tests/__init__.py:
(Transport._coalesce_offsets): Fix minor typo.
* bzrlib/tests/test_http.py:
(TestOffsets): Deleted. We rely on Transport._coalesce_offsets
only now.
(TestRangeHeader.check_header): Convert ranges to offets and use
Transport._coalesce_offsets.
(TestRangeHeader.test_range_header_single): Use tuples not lists !
(TestRanges._file_contents): Convert ranges to offets and use
Transport._coalesce_offsets. And fix indentation too.
(TestRanges._file_tail): Fix indentation.
(TestRanges.test_range_header): Use _file_contents so that we can
still use ranges.
modified:
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/_pycurl.py pycurlhttp.py-20060110060940-4e2a705911af77a6
bzrlib/transport/http/_urllib.py _urlgrabber.py-20060113083826-0bbf7d992fbf090c
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http.py'
--- a/bzrlib/tests/test_http.py 2007-05-06 06:59:57 +0000
+++ b/bzrlib/tests/test_http.py 2007-06-12 14:48:57 +0000
@@ -65,6 +65,7 @@
WallRequestHandler,
)
from bzrlib.transport import (
+ _CoalescedOffset,
do_catching_redirections,
get_transport,
Transport,
@@ -361,30 +362,6 @@
self.assertIsInstance(t, HttpTransport_urllib)
-class TestOffsets(TestCase):
- """Test offsets_to_ranges method"""
-
- def test_offsets_to_ranges_simple(self):
- to_range = HttpTransportBase.offsets_to_ranges
- ranges = to_range([(10, 1)])
- self.assertEqual([[10, 10]], ranges)
-
- ranges = to_range([(0, 1), (1, 1)])
- self.assertEqual([[0, 1]], ranges)
-
- ranges = to_range([(1, 1), (0, 1)])
- self.assertEqual([[0, 1]], ranges)
-
- def test_offset_to_ranges_overlapped(self):
- to_range = HttpTransportBase.offsets_to_ranges
-
- ranges = to_range([(10, 1), (20, 2), (22, 5)])
- self.assertEqual([[10, 10], [20, 26]], ranges)
-
- ranges = to_range([(10, 1), (11, 2), (22, 5)])
- self.assertEqual([[10, 12], [22, 26]], ranges)
-
-
class TestPost(object):
def _test_post_body_is_received(self, scheme):
@@ -427,12 +404,15 @@
"""Test range_header method"""
def check_header(self, value, ranges=[], tail=0):
- range_header = HttpTransportBase.range_header
- self.assertEqual(value, range_header(ranges, tail))
+ offsets = [ (start, end - start + 1) for start, end in ranges]
+ coalesce = Transport._coalesce_offsets
+ coalesced = list(coalesce(offsets, limit=0, fudge_factor=0))
+ range_header = HttpTransportBase._range_header
+ self.assertEqual(value, range_header(coalesced, tail))
def test_range_header_single(self):
- self.check_header('0-9', ranges=[[0,9]])
- self.check_header('100-109', ranges=[[100,109]])
+ self.check_header('0-9', ranges=[(0,9)])
+ self.check_header('100-109', ranges=[(100,109)])
def test_range_header_tail(self):
self.check_header('-10', tail=10)
@@ -926,18 +906,21 @@
server = self.get_readonly_server()
self.transport = self._transport(server.get_url())
- def _file_contents(self, relpath, ranges, tail_amount=0):
- code, data = self.transport._get(relpath, ranges)
- self.assertTrue(code in (200, 206),'_get returns: %d' % code)
- for start, end in ranges:
- data.seek(start)
- yield data.read(end - start + 1)
+ def _file_contents(self, relpath, ranges):
+ offsets = [ (start, end - start + 1) for start, end in ranges]
+ coalesce = self.transport._coalesce_offsets
+ coalesced = list(coalesce(offsets, limit=0, fudge_factor=0))
+ code, data = self.transport._get(relpath, coalesced)
+ self.assertTrue(code in (200, 206),'_get returns: %d' % code)
+ for start, end in ranges:
+ data.seek(start)
+ yield data.read(end - start + 1)
def _file_tail(self, relpath, tail_amount):
- code, data = self.transport._get(relpath, [], tail_amount)
- self.assertTrue(code in (200, 206),'_get returns: %d' % code)
- data.seek(-tail_amount + 1, 2)
- return data.read(tail_amount)
+ code, data = self.transport._get(relpath, [], tail_amount)
+ self.assertTrue(code in (200, 206),'_get returns: %d' % code)
+ data.seek(-tail_amount + 1, 2)
+ return data.read(tail_amount)
def test_range_header(self):
# Valid ranges
@@ -946,11 +929,11 @@
# Tail
self.assertEqual('789', self._file_tail('a', 3))
# Syntactically invalid range
- self.assertRaises(errors.InvalidRange,
- self.transport._get, 'a', [(4, 3)])
+ self.assertListRaises(errors.InvalidRange,
+ self._file_contents, 'a', [(4, 3)])
# Semantically invalid range
- self.assertRaises(errors.InvalidRange,
- self.transport._get, 'a', [(42, 128)])
+ self.assertListRaises(errors.InvalidRange,
+ self._file_contents, 'a', [(42, 128)])
class TestRanges_urllib(TestRanges, TestCaseWithWebserver):
=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py 2007-05-08 20:00:50 +0000
+++ b/bzrlib/transport/__init__.py 2007-06-12 14:48:57 +0000
@@ -575,7 +575,7 @@
:param fudge_factor: All transports have some level of 'it is
better to read some more data and throw it away rather
than seek', so collapse if we are 'close enough'
- :return: yield _CoalescedOffset objects, which have members for wher
+ :return: yield _CoalescedOffset objects, which have members for where
to start, how much to read, and how to split those
chunks back up
"""
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2007-05-06 06:59:57 +0000
+++ b/bzrlib/transport/http/__init__.py 2007-06-12 14:48:57 +0000
@@ -28,8 +28,13 @@
from bzrlib import errors, ui
from bzrlib.smart import medium
+from bzrlib.symbol_versioning import (
+ deprecated_method,
+ zero_seventeen,
+ )
from bzrlib.trace import mutter
from bzrlib.transport import (
+ _CoalescedOffset,
Transport,
)
@@ -232,7 +237,7 @@
:param relpath: Path relative to transport base URL
:param ranges: None to get the whole file;
- or [(start,end)+], a list of tuples to fetch parts of a file.
+ or a list of _CoalescedOffset to fetch parts of a file.
:param tail_amount: The amount to get from the end of the file.
:returns: (http_code, result_file)
@@ -279,30 +284,47 @@
# unsuccessful
raise exc_info[0], exc_info[1], exc_info[2]
+ # Having to round trip to the server means waiting for a response, so it is
+ # better to download extra bytes. We have two constraints to satisfy when
+ # choosing the right value: a request with too much ranges will make some
+ # servers issue a '400: Bad request' error (when we have too much ranges,
+ # we exceed the apache header max size (8190 by default) for example) ;
+ # coalescing too much ranges will increasing the data transferred and
+ # degrade performances.
+ _bytes_to_read_before_seek = 512
+ # No limit on the offset number combined into one, we are trying to avoid
+ # downloading the whole file.
+ _max_readv_combined = 0
+
def readv(self, relpath, offsets):
"""Get parts of the file at the given relative path.
:param offsets: A list of (offset, size) tuples.
:param return: A list or generator of (offset, data) tuples
"""
- ranges = self.offsets_to_ranges(offsets)
- mutter('http readv of %s collapsed %s offsets => %s',
- relpath, len(offsets), ranges)
+ sorted_offsets = sorted(list(offsets))
+ fudge = self._bytes_to_read_before_seek
+ coalesced = self._coalesce_offsets(sorted_offsets,
+ limit=self._max_readv_combine,
+ fudge_factor=fudge)
+ coalesced = list(coalesced)
+ mutter('http readv of %s offsets => %s collapsed %s',
+ relpath, len(offsets), len(coalesced))
try_again = True
while try_again:
try_again = False
try:
- code, f = self._get(relpath, ranges)
+ code, f = self._get(relpath, coalesced)
except (errors.InvalidRange, errors.ShortReadvError), e:
- try_again, code, f = self._retry_get(relpath, ranges,
+ try_again, code, f = self._retry_get(relpath, coalesced,
sys.exc_info())
for start, size in offsets:
try_again = True
while try_again:
try_again = False
- f.seek(start, (start < 0) and 2 or 0)
+ f.seek(start, ((start < 0) and 2) or 0)
start = f.tell()
try:
data = f.read(size)
@@ -313,12 +335,13 @@
# Note that we replace 'f' here and that it
# may need cleaning one day before being
# thrown that way.
- try_again, code, f = self._retry_get(relpath, ranges,
+ try_again, code, f = self._retry_get(relpath, coalesced,
sys.exc_info())
# After one or more tries, we get the data.
yield start, data
@staticmethod
+ @deprecated_method(zero_seventeen)
def offsets_to_ranges(offsets):
"""Turn a list of offsets and sizes into a list of byte ranges.
@@ -453,37 +476,40 @@
else:
return self.__class__(self.abspath(offset), self)
- def attempted_range_header(self, ranges, tail_amount):
+ def _attempted_range_header(self, offsets, tail_amount):
"""Prepare a HTTP Range header at a level the server should accept"""
if self._range_hint == 'multi':
# Nothing to do here
- return self.range_header(ranges, tail_amount)
+ return self._range_header(offsets, tail_amount)
elif self._range_hint == 'single':
# Combine all the requested ranges into a single
# encompassing one
- if len(ranges) > 0:
- start, ignored = ranges[0]
- ignored, end = ranges[-1]
+ if len(offsets) > 0:
if tail_amount not in (0, None):
- # Nothing we can do here to combine ranges
- # with tail_amount, just returns None. The
- # whole file should be downloaded.
+ # Nothing we can do here to combine ranges with tail_amount
+ # in a single range, just returns None. The whole file
+ # should be downloaded.
return None
else:
- return self.range_header([(start, end)], 0)
+ start = offsets[0].start
+ last = offsets[-1]
+ end = last.start + last.length - 1
+ whole = self._coalesce_offsets([(start, end - start + 1)],
+ limit=0, fudge_factor=0)
+ return self._range_header(list(whole), 0)
else:
# Only tail_amount, requested, leave range_header
# do its work
- return self.range_header(ranges, tail_amount)
+ return self._range_header(offsets, tail_amount)
else:
return None
@staticmethod
- def range_header(ranges, tail_amount):
+ def _range_header(ranges, tail_amount):
"""Turn a list of bytes ranges into a HTTP Range header value.
- :param ranges: A list of byte ranges, (start, end).
+ :param ranges: A list of _CoalescedOffset
:param tail_amount: The amount to get from the end of the file.
:return: HTTP range header string.
@@ -492,8 +518,9 @@
provided.
"""
strings = []
- for start, end in ranges:
- strings.append('%d-%d' % (start, end))
+ for offset in ranges:
+ strings.append('%d-%d' % (offset.start,
+ offset.start + offset.length - 1))
if tail_amount:
strings.append('-%d' % tail_amount)
=== modified file 'bzrlib/transport/http/_pycurl.py'
--- a/bzrlib/transport/http/_pycurl.py 2007-05-06 06:59:57 +0000
+++ b/bzrlib/transport/http/_pycurl.py 2007-06-12 14:48:57 +0000
@@ -136,10 +136,10 @@
else:
self._raise_curl_http_error(curl)
- def _get(self, relpath, ranges, tail_amount=0):
+ def _get(self, relpath, offsets, tail_amount=0):
# This just switches based on the type of request
- if ranges is not None or tail_amount not in (0, None):
- return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
+ if offsets is not None or tail_amount not in (0, None):
+ return self._get_ranged(relpath, offsets, tail_amount=tail_amount)
else:
return self._get_full(relpath)
@@ -189,12 +189,12 @@
return code, data
- def _get_ranged(self, relpath, ranges, tail_amount):
+ def _get_ranged(self, relpath, offsets, tail_amount):
"""Make a request for just part of the file."""
curl = self._curl
abspath, data, header = self._setup_get_request(curl, relpath)
- range_header = self.attempted_range_header(ranges, tail_amount)
+ range_header = self._attempted_range_header(offsets, tail_amount)
if range_header is None:
# Forget ranges, the server can't handle them
return self._get_full(relpath)
=== modified file 'bzrlib/transport/http/_urllib.py'
--- a/bzrlib/transport/http/_urllib.py 2007-04-22 16:32:04 +0000
+++ b/bzrlib/transport/http/_urllib.py 2007-06-12 14:48:57 +0000
@@ -113,13 +113,13 @@
return response
- def _get(self, relpath, ranges, tail_amount=0):
+ def _get(self, relpath, offsets, tail_amount=0):
"""See HttpTransport._get"""
abspath = self._real_abspath(relpath)
headers = {}
- if ranges or tail_amount:
- range_header = self.attempted_range_header(ranges, tail_amount)
+ if offsets or tail_amount:
+ range_header = self._attempted_range_header(offsets, tail_amount)
if range_header is not None:
bytes = 'bytes=' + range_header
headers = {'Range': bytes}
More information about the bazaar-commits
mailing list