Rev 3161: (vila) Fix #179368 by keeping the current range hint on in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Jan 3 18:09:12 GMT 2008
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3161
revision-id:pqm at pqm.ubuntu.com-20080103180901-w987y1ftqoh02qbm
parent: pqm at pqm.ubuntu.com-20080103170805-t22jan69n4n5kkub
parent: v.ladeuil+lp at free.fr-20080103162917-soe75ph78vaojyk3
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2008-01-03 18:09:01 +0000
message:
(vila) Fix #179368 by keeping the current range hint on
ShortReadvErrors
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/tests/test_http_response.py test_http_response.py-20060628233143-950b2a482a32505d
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
------------------------------------------------------------
revno: 3159.1.1
revision-id:v.ladeuil+lp at free.fr-20080103162917-soe75ph78vaojyk3
parent: pqm at pqm.ubuntu.com-20080103160106-v6qfwv4ta8sorx1q
parent: v.ladeuil+lp at free.fr-20080103162632-6a0mxvbq22hd1pvo
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: trunk
timestamp: Thu 2008-01-03 17:29:17 +0100
message:
Fix #179368 by keeping the current range hint on ShortReadvErrors
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/tests/test_http_response.py test_http_response.py-20060628233143-950b2a482a32505d
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
------------------------------------------------------------
revno: 3146.3.4
revision-id:v.ladeuil+lp at free.fr-20080103162632-6a0mxvbq22hd1pvo
parent: v.ladeuil+lp at free.fr-20080103151258-idpzfox078f80vhe
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 179368
timestamp: Thu 2008-01-03 17:26:32 +0100
message:
Review feedback, simpler loops.
modified:
bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
------------------------------------------------------------
revno: 3146.3.3
revision-id:v.ladeuil+lp at free.fr-20080103151258-idpzfox078f80vhe
parent: v.ladeuil+lp at free.fr-20080102141355-k20yfjo6i1dasuny
parent: pqm at pqm.ubuntu.com-20080103103822-fj2udnviy9ilfsst
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 179368
timestamp: Thu 2008-01-03 16:12:58 +0100
message:
merge bzr.dev
added:
doc/developers/lca-merge.txt lcamerge.txt-20080103061803-9isydn4ivgwrvorw-1
doc/en/tutorials/using_bazaar_with_launchpad.txt using_bazaar_with_lp-20071211073140-7msh8uf9a9h4y9hb-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/bugtracker.py bugtracker.py-20070410073305-vu1vu1qosjurg8kb-1
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/diff.py diff.py-20050309040759-26944fbbf2ebbf36
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/help_topics/__init__.py help_topics.py-20060920210027-rnim90q9e0bwxvy4-1
bzrlib/merge.py merge.py-20050513021216-953b65a438527106
bzrlib/option.py option.py-20051014052914-661fb36e76e7362f
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/blackbox/test_annotate.py testannotate.py-20051013044000-457f44801bfa9d39
bzrlib/tests/blackbox/test_log.py test_log.py-20060112090212-78f6ea560c868e24
bzrlib/tests/blackbox/test_merge.py test_merge.py-20060323225809-9bc0459c19917f41
bzrlib/tests/branch_implementations/test_parent.py test_parent.py-20050830052751-5e62766623c32222
bzrlib/tests/http_server.py httpserver.py-20061012142527-m1yxdj1xazsf8d7s-1
bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/tests/test_diff.py testdiff.py-20050727164403-d1a3496ebb12e339
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/tests/test_merge.py testmerge.py-20050905070950-c1b5aa49ff911024
bzrlib/tests/test_osutils.py test_osutils.py-20051201224856-e48ee24c12182989
bzrlib/tests/test_urlutils.py test_urlutils.py-20060502192900-46b1f9579987cf9c
bzrlib/trace.py trace.py-20050309040759-c8ed824bdcd4748a
bzrlib/tree.py tree.py-20050309040759-9d5f2496be663e77
bzrlib/urlutils.py urlutils.py-20060502195429-e8a161ecf8fac004
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
doc/developers/index.txt index.txt-20070508041241-qznziunkg0nffhiw-1
doc/en/user-guide/bug_trackers.txt bug_trackers.txt-20070713223459-khxdlcudraii95uv-1
doc/index.txt index.txt-20070813101924-07gd9i9d2jt124bf-1
------------------------------------------------------------
revno: 3146.3.2
revision-id:v.ladeuil+lp at free.fr-20080102141355-k20yfjo6i1dasuny
parent: v.ladeuil+lp at free.fr-20071230124033-gk9iv67553aq5kw2
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 179368
timestamp: Wed 2008-01-02 15:13:55 +0100
message:
Fix #179368 by keeping the current range hint on ShortReadvErrors.
* response.py:
(RangeFile._checked_read): Avoid huge buffering when huge seeks
are required.
* _urllib2_wrappers.py:
(Response.finish): Check for end-of-file or we'll loop if the
server lied about Content-Length.
* __init__.py:
(HttpTransportBase._readv): When a ShortReadvError occurs, try
again, staying in multiple range mode and degrades only if the
error occurs again for the same offset.
* test_http_response.py:
(TestRangeFileMultipleRanges): Add a test to exercise the buffer
overflow protection code.
* test_http.py:
(TestMultipleRangeWithoutContentLengthServer): Emulate lighttpd
behavior regarding bug #179368.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/tests/test_http_response.py test_http_response.py-20060628233143-950b2a482a32505d
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
------------------------------------------------------------
revno: 3146.3.1
revision-id:v.ladeuil+lp at free.fr-20071230124033-gk9iv67553aq5kw2
parent: pqm at pqm.ubuntu.com-20071228175832-9kboqtkemnuzzlab
parent: v.ladeuil+lp at free.fr-20071228165130-iv5p12lfc2fmbb7u
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 179368
timestamp: Sun 2007-12-30 13:40:33 +0100
message:
merge fixes for #175524 first
added:
bzrlib/tests/test_http_implementations.py test_http_implementa-20071218210003-65nh81gglcfvurw6-1
renamed:
bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py HTTPTestUtil.py-20050914180604-247d3aafb7a43343
bzrlib/tests/HttpServer.py => bzrlib/tests/http_server.py httpserver.py-20061012142527-m1yxdj1xazsf8d7s-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/blackbox/test_ignore.py test_ignore.py-20060703063225-4tm8dc2pa7wwg2t3-1
bzrlib/tests/blackbox/test_selftest.py test_selftest.py-20060123024542-01c5f1bbcb596d78
bzrlib/tests/blackbox/test_too_much.py blackbox.py-20050620052131-a7370d756399f615
bzrlib/tests/branch_implementations/test_branch.py testbranch.py-20050711070244-121d632bc37d7253
bzrlib/tests/branch_implementations/test_http.py test_http.py-20060731224648-2eef7ae5yja95rya-1
bzrlib/tests/test_bzrdir.py test_bzrdir.py-20060131065654-deba40eef51cf220
bzrlib/tests/test_fetch.py testfetch.py-20050825090644-f73e07e7dfb1765a
bzrlib/tests/test_http.py testhttp.py-20051018020158-b2eef6e867c514d9
bzrlib/tests/test_selftest.py test_selftest.py-20051202044319-c110a115d8c0456a
bzrlib/tests/test_sftp_transport.py testsftp.py-20051027032739-247570325fec7e7e
bzrlib/tests/test_smart_transport.py test_ssh_transport.py-20060608202016-c25gvf1ob7ypbus6-2
bzrlib/tests/test_transport.py testtransport.py-20050718175618-e5cdb99f4555ddce
bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/transport/http/_pycurl.py pycurlhttp.py-20060110060940-4e2a705911af77a6
bzrlib/transport/http/_urllib.py _urlgrabber.py-20060113083826-0bbf7d992fbf090c
bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
bzrlib/tests/http_utils.py HTTPTestUtil.py-20050914180604-247d3aafb7a43343
bzrlib/tests/http_server.py httpserver.py-20061012142527-m1yxdj1xazsf8d7s-1
=== modified file 'NEWS'
--- a/NEWS 2008-01-03 17:08:05 +0000
+++ b/NEWS 2008-01-03 18:09:01 +0000
@@ -64,6 +64,9 @@
BUGFIXES:
+ * Better handle short reads when processing multiple range requests.
+ (Vincent Ladeuil, #179368)
+
* build_tree acceleration uses the correct path when a file has been moved.
(Aaron Bentley)
=== modified file 'bzrlib/tests/test_http.py'
--- a/bzrlib/tests/test_http.py 2008-01-03 08:44:12 +0000
+++ b/bzrlib/tests/test_http.py 2008-01-03 15:12:58 +0000
@@ -904,6 +904,88 @@
_req_handler_class = MultipleRangeWithoutContentLengthRequestHandler
+
+class TruncatedMultipleRangeRequestHandler(
+ http_server.TestingHTTPRequestHandler):
+ """Reply to multiple range requests truncating the last ones.
+
+ This server generates responses whose Content-Length describes all the
+ ranges, but fail to include the last ones leading to client short reads.
+ This has been observed randomly with lighttpd (bug #179368).
+ """
+
+ _truncated_ranges = 2
+
+ def get_multiple_ranges(self, file, file_size, ranges):
+ self.send_response(206)
+ self.send_header('Accept-Ranges', 'bytes')
+ boundary = 'tagada'
+ self.send_header('Content-Type',
+ 'multipart/byteranges; boundary=%s' % boundary)
+ boundary_line = '--%s\r\n' % boundary
+ # Calculate the Content-Length
+ content_length = 0
+ for (start, end) in ranges:
+ content_length += len(boundary_line)
+ content_length += self._header_line_length(
+ 'Content-type', 'application/octet-stream')
+ content_length += self._header_line_length(
+ 'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
+ content_length += len('\r\n') # end headers
+ content_length += end - start # + 1
+ content_length += len(boundary_line)
+ self.send_header('Content-length', content_length)
+ self.end_headers()
+
+ # Send the multipart body
+ cur = 0
+ for (start, end) in ranges:
+ self.wfile.write(boundary_line)
+ self.send_header('Content-type', 'application/octet-stream')
+ self.send_header('Content-Range', 'bytes %d-%d/%d'
+ % (start, end, file_size))
+ self.end_headers()
+ if cur + self._truncated_ranges >= len(ranges):
+ # Abruptly ends the response and close the connection
+ self.close_connection = 1
+ return
+ self.send_range_content(file, start, end - start + 1)
+ cur += 1
+ # No final boundary
+ self.wfile.write(boundary_line)
+
+
+class TestTruncatedMultipleRangeServer(TestSpecificRequestHandler):
+
+ _req_handler_class = TruncatedMultipleRangeRequestHandler
+
+ def setUp(self):
+ super(TestTruncatedMultipleRangeServer, self).setUp()
+ self.build_tree_contents([('a', '0123456789')],)
+
+ def test_readv_with_short_reads(self):
+ server = self.get_readonly_server()
+ t = self._transport(server.get_url())
+ # Force separate ranges for each offset
+ t._bytes_to_read_before_seek = 0
+ ireadv = iter(t.readv('a', ((0, 1), (2, 1), (4, 2), (9, 1))))
+ self.assertEqual((0, '0'), ireadv.next())
+ self.assertEqual((2, '2'), ireadv.next())
+ if not self._testing_pycurl():
+ # Only one request have been issued so far (except for pycurl that
+ # try to read the whole response at once)
+ self.assertEqual(1, server.GET_request_nb)
+ self.assertEqual((4, '45'), ireadv.next())
+ self.assertEqual((9, '9'), ireadv.next())
+ # Both implementations issue 3 requests but:
+ # - urllib does two multiple (4 ranges, then 2 ranges) then a single
+ # range,
+ # - pycurl does two multiple (4 ranges, 4 ranges) then a single range
+ self.assertEqual(3, server.GET_request_nb)
+ # Finally the client have tried a single range request and stays in
+ # that mode
+ self.assertEqual('single', t._range_hint)
+
class LimitedRangeRequestHandler(http_server.TestingHTTPRequestHandler):
"""Errors out when range specifiers exceed the limit"""
=== modified file 'bzrlib/tests/test_http_response.py'
--- a/bzrlib/tests/test_http_response.py 2007-12-13 15:40:42 +0000
+++ b/bzrlib/tests/test_http_response.py 2008-01-02 14:13:55 +0000
@@ -249,7 +249,7 @@
f._pos = 0 # Force an invalid pos
self.assertRaises(errors.InvalidRange, f.read, 2)
-class TestRangeFilMultipleRanges(tests.TestCase, TestRangeFileMixin):
+class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
"""Test a RangeFile for multiple ranges.
The RangeFile used for the tests contains three ranges:
@@ -263,7 +263,7 @@
"""
def setUp(self):
- super(TestRangeFilMultipleRanges, self).setUp()
+ super(TestRangeFileMultipleRanges, self).setUp()
boundary = 'separation'
@@ -361,6 +361,14 @@
f.seek(126) # skip the two first ranges
self.assertEquals('AB', f.read(2))
+ def test_checked_read_dont_overflow_buffers(self):
+ f = self._file
+ start = self.first_range_start
+ # We force a very low value to exercise all code paths in _checked_read
+ f._discarded_buf_size = 8
+ f.seek(126) # skip the two first ranges
+ self.assertEquals('AB', f.read(2))
+
def test_seek_twice_between_ranges(self):
f = self._file
start = self.first_range_start
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2007-12-20 14:14:21 +0000
+++ b/bzrlib/transport/http/__init__.py 2008-01-02 14:13:55 +0000
@@ -217,6 +217,7 @@
offsets = list(offsets)
try_again = True
+ retried_offset = None
while try_again:
try_again = False
@@ -271,10 +272,20 @@
except (errors.ShortReadvError, errors.InvalidRange,
errors.InvalidHttpRange), e:
- self._degrade_range_hint(relpath, coalesced, sys.exc_info())
+ mutter('Exception %r: %s during http._readv',e, e)
+ if (not isinstance(e, errors.ShortReadvError)
+ or retried_offset == cur_offset_and_size):
+ # We don't degrade the range hint for ShortReadvError since
+ # they do not indicate a problem with the server ability to
+ # handle ranges. Except when we fail to get back a required
+ # offset twice in a row. In that case, falling back to
+ # single range or whole file should help or end up in a
+ # fatal exception.
+ self._degrade_range_hint(relpath, coalesced, sys.exc_info())
# Some offsets may have been already processed, so we retry
# only the unsuccessful ones.
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
+ retried_offset = cur_offset_and_size
try_again = True
def _coalesce_readv(self, relpath, coalesced):
=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- a/bzrlib/transport/http/_urllib2_wrappers.py 2007-12-28 16:51:30 +0000
+++ b/bzrlib/transport/http/_urllib2_wrappers.py 2008-01-03 16:26:32 +0000
@@ -75,6 +75,12 @@
# Some responses have bodies in which we have no interest
_body_ignored_responses = [301,302, 303, 307, 401, 403, 404]
+ # in finish() below, we may have to discard several MB in the worst
+ # case. To avoid buffering that much, we read and discard by chunks
+ # instead. The underlying file is either a socket or a StringIO, so reading
+ # 8k chunks should be fine.
+ _discarded_buf_size = 8192
+
def begin(self):
"""Begin to read the response from the server.
@@ -113,12 +119,6 @@
# below we keep the socket with the server opened.
self.will_close = False
- # in finish() below, we may have to discard several MB in the worst
- # case. To avoid buffering that much, we read and discard by chunks
- # instead. The underlying file is either a socket or a StringIO, so reading
- # 8k chunks should be fine.
- _discarded_buf_size = 8192
-
def finish(self):
"""Finish reading the body.
@@ -134,16 +134,13 @@
if not self.isclosed():
# Make sure nothing was left to be read on the socket
pending = 0
- while self.length and self.length > self._discarded_buf_size:
- data = self.read(self._discarded_buf_size)
- pending += len(data)
- if self.length:
- data = self.read(self.length)
+ data = True
+ while data and self.length:
+ # read() will update self.length
+ data = self.read(min(self.length, self._discarded_buf_size))
pending += len(data)
if pending:
- trace.mutter(
- "%s bytes left on the HTTP socket",
- pending)
+ trace.mutter("%s bytes left on the HTTP socket", pending)
self.close()
return pending
=== modified file 'bzrlib/transport/http/response.py'
--- a/bzrlib/transport/http/response.py 2007-12-10 10:41:24 +0000
+++ b/bzrlib/transport/http/response.py 2008-01-03 16:26:32 +0000
@@ -55,6 +55,12 @@
should happen with monotonically increasing offsets.
"""
+ # in _checked_read() below, we may have to discard several MB in the worst
+ # case. To avoid buffering that much, we read and discard by chunks
+ # instead. The underlying file is either a socket or a StringIO, so reading
+ # 8k chunks should be fine.
+ _discarded_buf_size = 8192
+
def __init__(self, path, infile):
"""Constructor.
@@ -145,23 +151,27 @@
self.set_range(start, size)
def _checked_read(self, size):
- """Read the file checking for short reads"""
+ """Read the file checking for short reads.
+
+ The data read is discarded along the way.
+ """
pos = self._pos
- data = self._file.read(size)
- data_len = len(data)
- if size > 0 and data_len != size:
- raise errors.ShortReadvError(self._path, pos, size, data_len)
- self._pos += data_len
- return data
+ remaining = size
+ while remaining > 0:
+ data = self._file.read(min(remaining, self._discarded_buf_size))
+ remaining -= len(data)
+ if not data:
+ raise errors.ShortReadvError(self._path, pos, size,
+ size - remaining)
+ self._pos += size
def _seek_to_next_range(self):
# We will cross range boundaries
if self._boundary is None:
# If we don't have a boundary, we can't find another range
- raise errors.InvalidRange(
- self._path, self._pos,
- "Range (%s, %s) exhausted"
- % (self._start, self._size))
+ raise errors.InvalidRange(self._path, self._pos,
+ "Range (%s, %s) exhausted"
+ % (self._start, self._size))
self.read_boundary()
self.read_range_definition()
More information about the bazaar-commits
mailing list