Rev 6467: (vila) Avoid invalid range access errors on whole files when using http in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

Tue Feb 14 17:49:29 UTC 2012

At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6467 [merge]
revision-id: pqm at pqm.ubuntu.com-20120214174928-2ybbnystvv1mhgvz
parent: pqm at pqm.ubuntu.com-20120207134616-4f26nutrmui7s5xp
parent: v.ladeuil+lp at free.fr-20120214145525-f5pzb8tiorf2oac7
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2012-02-14 17:49:28 +0000
message:
  (vila) Avoid invalid range access errors on whole files when using http
   transport (Vincent Ladeuil)
modified:
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
  doc/en/release-notes/bzr-2.6.txt bzr2.6.txt-20120116134316-8w1xxom1c7vcu1t5-1
=== modified file 'bzrlib/transport/http/__init__.py'

--- a/bzrlib/transport/http/__init__.py	2012-01-27 19:05:33 +0000
+++ b/bzrlib/transport/http/__init__.py	2012-02-14 14:55:25 +0000
@@ -21,6 +21,7 @@
 
 from __future__ import absolute_import
 
+import os
 import re
 import urlparse
 import sys
@@ -236,7 +237,7 @@
                     # Split the received chunk
                     for offset, size in cur_coal.ranges:
                         start = cur_coal.start + offset
-                        rfile.seek(start, 0)
+                        rfile.seek(start, os.SEEK_SET)
                         data = rfile.read(size)
                         data_len = len(data)
                         if data_len != size:

=== modified file 'bzrlib/transport/http/response.py'
--- a/bzrlib/transport/http/response.py	2012-01-23 20:10:03 +0000
+++ b/bzrlib/transport/http/response.py	2012-02-14 14:55:25 +0000
@@ -23,6 +23,7 @@
 
 from __future__ import absolute_import
 
+import os
 import httplib
 from cStringIO import StringIO
 import rfc822
@@ -33,20 +34,64 @@
     )
 
 
+class ResponseFile(object):
+    """A wrapper around the http socket containing the result of a GET request.
+
+    Only read() and seek() (forward) are supported.
+    """
+    def __init__(self, path, infile):
+        """Constructor.
+
+        :param path: File url, for error reports.
+
+        :param infile: File-like socket set at body start.
+        """
+        self._path = path
+        self._file = infile
+        self._pos = 0
+
+    def close(self):
+        """Close this file.
+
+        Dummy implementation for consistency with the 'file' API.
+        """
+
+    def read(self, size=-1):
+        """Read size bytes from the current position in the file.
+
+        :param size:  The number of bytes to read.  Leave unspecified or pass
+            -1 to read to EOF.
+        """
+        data =  self._file.read(size)
+        self._pos += len(data)
+        return data
+
+    def seek(self, offset, whence=os.SEEK_SET):
+        if whence == os.SEEK_SET:
+            if offset < self._pos:
+                raise AsserttionError(
+                    "Can't seek backwards, pos: %s, offset: %s"
+                    % (self._pos, offfset))
+            to_discard = offset - self._pos
+        elif whence == os.SEEK_CUR:
+            to_discard = offset
+        else:
+            raise AssertionError("Can't seek backwards")
+        if to_discard:
+            # Just discard the unwanted bytes
+            self.read(to_discard)
+
 # A RangeFile expects the following grammar (simplified to outline the
 # assumptions we rely upon).
 
-# file: whole_file
-#     | single_range
+# file: single_range
 #     | multiple_range
 
-# whole_file: [content_length_header] data
-
 # single_range: content_range_header data
 
 # multiple_range: boundary_header boundary (content_range_header data boundary)+
 
-class RangeFile(object):
+class RangeFile(ResponseFile):
     """File-like object that allow access to partial available data.
 
     All accesses should happen sequentially since the acquisition occurs during
@@ -71,10 +116,10 @@
         """Constructor.
 
         :param path: File url, for error reports.
+
         :param infile: File-like socket set at body start.
         """
-        self._path = path
-        self._file = infile
+        super(RangeFile, self).__init__(path, infile)
         self._boundary = None
         # When using multi parts response, this will be set with the headers
         # associated with the range currently read.
@@ -82,12 +127,6 @@
         # Default to the whole file of unspecified size
         self.set_range(0, -1)
 
-    def close(self):
-        """Close this file.
-
-        Dummy implementation for consistency with the 'file' API.
-        """
-
     def set_range(self, start, size):
         """Change the range mapping"""
         self._start = start
@@ -304,16 +343,11 @@
     :return: A file-like object that can seek()+read() the
              ranges indicated by the headers.
     """
-    rfile = RangeFile(url, data)
     if code == 200:
         # A whole file
-        size = msg.getheader('content-length', None)
-        if size is None:
-            size = -1
-        else:
-            size = int(size)
-        rfile.set_range(0, size)
+        rfile = ResponseFile(url, data)
     elif code == 206:
+        rfile = RangeFile(url, data)
         content_type = msg.getheader('content-type', None)
         if content_type is None:
             # When there is no content-type header we treat the response as

=== modified file 'doc/en/release-notes/bzr-2.6.txt'
--- a/doc/en/release-notes/bzr-2.6.txt	2012-02-03 10:28:47 +0000
+++ b/doc/en/release-notes/bzr-2.6.txt	2012-02-14 17:49:28 +0000
@@ -26,10 +26,13 @@
 .. Improvements to existing commands, especially improved performance 
    or memory usage, or better results.
 
- * Access to HTTPS URLs now uses the urrllib implementation by default.
-   For the old pycurl-based implementation, specify ``https+pycurl://`` as
-   the URL scheme when accessing a HTTPS location.
-   (Jelmer Vernooij, #125055)
+* Access to HTTPS URLs now uses the urrllib implementation by default.
+  For the old pycurl-based implementation, specify ``https+pycurl://`` as
+  the URL scheme when accessing a HTTPS location.
+  (Jelmer Vernooij, #125055)
+
+* Avoid 'Invalid range access' errors when whole files are retrieved with
+  transport.http.get() . (Vincent Ladeuil, #924746)
 
 Bug Fixes
 *********