Rev 6467: (vila) Avoid invalid range access errors on whole files when using http in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Feb 14 17:49:29 UTC 2012
At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 6467 [merge]
revision-id: pqm at pqm.ubuntu.com-20120214174928-2ybbnystvv1mhgvz
parent: pqm at pqm.ubuntu.com-20120207134616-4f26nutrmui7s5xp
parent: v.ladeuil+lp at free.fr-20120214145525-f5pzb8tiorf2oac7
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2012-02-14 17:49:28 +0000
message:
(vila) Avoid invalid range access errors on whole files when using http
transport (Vincent Ladeuil)
modified:
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
doc/en/release-notes/bzr-2.6.txt bzr2.6.txt-20120116134316-8w1xxom1c7vcu1t5-1
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2012-01-27 19:05:33 +0000
+++ b/bzrlib/transport/http/__init__.py 2012-02-14 14:55:25 +0000
@@ -21,6 +21,7 @@
from __future__ import absolute_import
+import os
import re
import urlparse
import sys
@@ -236,7 +237,7 @@
# Split the received chunk
for offset, size in cur_coal.ranges:
start = cur_coal.start + offset
- rfile.seek(start, 0)
+ rfile.seek(start, os.SEEK_SET)
data = rfile.read(size)
data_len = len(data)
if data_len != size:
=== modified file 'bzrlib/transport/http/response.py'
--- a/bzrlib/transport/http/response.py 2012-01-23 20:10:03 +0000
+++ b/bzrlib/transport/http/response.py 2012-02-14 14:55:25 +0000
@@ -23,6 +23,7 @@
from __future__ import absolute_import
+import os
import httplib
from cStringIO import StringIO
import rfc822
@@ -33,20 +34,64 @@
)
+class ResponseFile(object):
+ """A wrapper around the http socket containing the result of a GET request.
+
+ Only read() and seek() (forward) are supported.
+ """
+ def __init__(self, path, infile):
+ """Constructor.
+
+ :param path: File url, for error reports.
+
+ :param infile: File-like socket set at body start.
+ """
+ self._path = path
+ self._file = infile
+ self._pos = 0
+
+ def close(self):
+ """Close this file.
+
+ Dummy implementation for consistency with the 'file' API.
+ """
+
+ def read(self, size=-1):
+ """Read size bytes from the current position in the file.
+
+ :param size: The number of bytes to read. Leave unspecified or pass
+ -1 to read to EOF.
+ """
+ data = self._file.read(size)
+ self._pos += len(data)
+ return data
+
+ def seek(self, offset, whence=os.SEEK_SET):
+ if whence == os.SEEK_SET:
+ if offset < self._pos:
+ raise AsserttionError(
+ "Can't seek backwards, pos: %s, offset: %s"
+ % (self._pos, offfset))
+ to_discard = offset - self._pos
+ elif whence == os.SEEK_CUR:
+ to_discard = offset
+ else:
+ raise AssertionError("Can't seek backwards")
+ if to_discard:
+ # Just discard the unwanted bytes
+ self.read(to_discard)
+
# A RangeFile expects the following grammar (simplified to outline the
# assumptions we rely upon).
-# file: whole_file
-# | single_range
+# file: single_range
# | multiple_range
-# whole_file: [content_length_header] data
-
# single_range: content_range_header data
# multiple_range: boundary_header boundary (content_range_header data boundary)+
-class RangeFile(object):
+class RangeFile(ResponseFile):
"""File-like object that allow access to partial available data.
All accesses should happen sequentially since the acquisition occurs during
@@ -71,10 +116,10 @@
"""Constructor.
:param path: File url, for error reports.
+
:param infile: File-like socket set at body start.
"""
- self._path = path
- self._file = infile
+ super(RangeFile, self).__init__(path, infile)
self._boundary = None
# When using multi parts response, this will be set with the headers
# associated with the range currently read.
@@ -82,12 +127,6 @@
# Default to the whole file of unspecified size
self.set_range(0, -1)
- def close(self):
- """Close this file.
-
- Dummy implementation for consistency with the 'file' API.
- """
-
def set_range(self, start, size):
"""Change the range mapping"""
self._start = start
@@ -304,16 +343,11 @@
:return: A file-like object that can seek()+read() the
ranges indicated by the headers.
"""
- rfile = RangeFile(url, data)
if code == 200:
# A whole file
- size = msg.getheader('content-length', None)
- if size is None:
- size = -1
- else:
- size = int(size)
- rfile.set_range(0, size)
+ rfile = ResponseFile(url, data)
elif code == 206:
+ rfile = RangeFile(url, data)
content_type = msg.getheader('content-type', None)
if content_type is None:
# When there is no content-type header we treat the response as
=== modified file 'doc/en/release-notes/bzr-2.6.txt'
--- a/doc/en/release-notes/bzr-2.6.txt 2012-02-03 10:28:47 +0000
+++ b/doc/en/release-notes/bzr-2.6.txt 2012-02-14 17:49:28 +0000
@@ -26,10 +26,13 @@
.. Improvements to existing commands, especially improved performance
or memory usage, or better results.
- * Access to HTTPS URLs now uses the urrllib implementation by default.
- For the old pycurl-based implementation, specify ``https+pycurl://`` as
- the URL scheme when accessing a HTTPS location.
- (Jelmer Vernooij, #125055)
+* Access to HTTPS URLs now uses the urrllib implementation by default.
+ For the old pycurl-based implementation, specify ``https+pycurl://`` as
+ the URL scheme when accessing a HTTPS location.
+ (Jelmer Vernooij, #125055)
+
+* Avoid 'Invalid range access' errors when whole files are retrieved with
+ transport.http.get() . (Vincent Ladeuil, #924746)
Bug Fixes
*********
More information about the bazaar-commits
mailing list