Rev 3077: Take spiv review comments into account. in file:///v/home/vila/src/bzr/bugs/173010/

Mon Dec 10 10:41:29 GMT 2007

At file:///v/home/vila/src/bzr/bugs/173010/

------------------------------------------------------------
revno: 3077
revision-id:v.ladeuil+lp at free.fr-20071210104124-0brt3h7ed1kiug0v
parent: v.ladeuil+lp at free.fr-20071208231518-sj2ui57xyd4mkjra
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 173010
timestamp: Mon 2007-12-10 11:41:24 +0100
message:
  Take spiv review comments into account.
  
  * bzrlib/transport/http/response.py:
  (RangeFile._seek_to_next_range): Factored out since this is now
  used by both seek and read.
  (RangeFile.read): Trigger next range recognition when needed.
  (RangeFile.seek): Don't seek over the range boundary if not
  required to.
  
  * bzrlib/transport/http/__init__.py:
  (HttpTransportBase._coalesce_readv.get_and_yield): Add a
  prophylactic assertionError.
  
  * bzrlib/tests/test_transport_implementations.py:
  Fix typos.
  
  * bzrlib/tests/test_http_response.py:
  (TestRangeFileMixin.test_read_zero,
  TestRangeFileMixin.test_seek_at_range_end,
  TestRangeFileMixin.test_read_at_range_end,
  TestRangeFileSizeUnknown.test_read_at_range_end,
  TestRangeFilMultipleRanges.test_seek_at_range_end,
  TestRangeFilMultipleRanges.test_read_at_range_end): More tests
  covering read(0).
modified:
  bzrlib/tests/test_http_response.py test_http_response.py-20060628233143-950b2a482a32505d
  bzrlib/tests/test_transport.py testtransport.py-20050718175618-e5cdb99f4555ddce
  bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http_response.py'

--- a/bzrlib/tests/test_http_response.py	2007-12-08 14:31:01 +0000
+++ b/bzrlib/tests/test_http_response.py	2007-12-10 10:41:24 +0000
@@ -16,9 +16,8 @@
 
 """Tests from HTTP response parsing.
 
-
-We test two main things in this module the RangeFile class and the
-handle_response method.
+The handle_response method read the response body of a GET request an returns
+the corresponding RangeFile.
 
 There are four different kinds of RangeFile:
 - a whole file whose size is unknown, seen as a simple byte stream,
@@ -31,12 +30,11 @@
 - seek can only be forward (its really a socket underneath),
 - read can't cross ranges,
 - successive ranges are taken into account transparently,
+
 - the expected pattern of use is either seek(offset)+read(size) or a single
-  read with no size specified
-
-The handle_response method read the response body of a GET request an returns
-the corresponding RangeFile.
-
+  read with no size specified. For multiple range files, multiple read() will
+  return the corresponding ranges, trying to read further will raise
+  InvalidHttpResponse.
 """
 
 from cStringIO import StringIO
@@ -54,7 +52,7 @@
 
     # A simple string used to represent a file part (also called a range), in
     # which offsets are easy to calculate for test writers. It's used as a
-    # building block with slight variations but basically 'a' if the first char
+    # building block with slight variations but basically 'a' is the first char
     # of the range and 'z' is the last.
     alpha = 'abcdefghijklmnopqrstuvwxyz'
 
@@ -77,8 +75,32 @@
         cur += 4
         self.assertEquals('klmn', f.read(4))
         cur += len('klmn')
+        # read(0) in the middle of a range
+        self.assertEquals('', f.read(0))
+        # seek in place
+        here = f.tell()
+        f.seek(0, 1)
+        self.assertEquals(here, f.tell())
         self.assertEquals(cur, f.tell())
 
+    def test_read_zero(self):
+        f = self._file
+        start = self.first_range_start
+        self.assertEquals('', f.read(0))
+        f.seek(10, 1)
+        self.assertEquals('', f.read(0))
+
+    def test_seek_at_range_end(self):
+        f = self._file
+        f.seek(26, 1)
+
+    def test_read_at_range_end(self):
+        """Test read behaviour at range end."""
+        f = self._file
+        self.assertEquals(self.alpha, f.read())
+        self.assertEquals('', f.read(0))
+        self.assertRaises(errors.InvalidRange, f.read, 1)
+
     def test_unbounded_read_after_seek(self):
         f = self._file
         f.seek(24, 1)
@@ -143,6 +165,12 @@
         """
         self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
 
+    def test_read_at_range_end(self):
+        """Test read behaviour at range end."""
+        f = self._file
+        self.assertEquals(self.alpha, f.read())
+        self.assertEquals('', f.read(0))
+        self.assertEquals('', f.read(1))
 
 class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
     """Test a RangeFile for a whole file whose size is known."""
@@ -173,7 +201,17 @@
         self.assertRaises(errors.InvalidRange, f.read, 2)
 
 class TestRangeFilMultipleRanges(tests.TestCase, TestRangeFileMixin):
-    """Test a RangeFile for multiple ranges."""
+    """Test a RangeFile for multiple ranges.
+
+    The RangeFile used for the tests contains three ranges:
+
+    - at offset 25: alpha
+    - at offset 100: alpha
+    - at offset 126: alpha.upper()
+
+    The two last ranges are contiguous. This only rarely occurs (should not in
+    fact) in real uses but may lead to hard to track bugs.
+    """
 
     def setUp(self):
         super(TestRangeFilMultipleRanges, self).setUp()
@@ -248,7 +286,7 @@
     def test_seek_from_end(self):
         """See TestRangeFileMixin.test_seek_from_end."""
         # The actual implementation will seek from end for the first range only
-        # and then fail. Since seeking from end is intented to be used for a
+        # and then fail. Since seeking from end is intended to be used for a
         # single range only anyway, this test just document the actual
         # behaviour.
         f = self._file
@@ -268,7 +306,7 @@
         f.seek(100)
         f.seek(125)
 
-    def test_seek_above_ranges(self):
+    def test_seek_across_ranges(self):
         f = self._file
         start = self.first_range_start
         f.seek(126) # skip the two first ranges
@@ -281,6 +319,20 @@
         # Now the file is positioned at the second range start (100)
         self.assertRaises(errors.InvalidRange, f.seek, start + 41)
 
+    def test_seek_at_range_end(self):
+        """Test seek behavior at range end."""
+        f = self._file
+        f.seek(25 + 25)
+        f.seek(100 + 25)
+        f.seek(126 + 25)
+
+    def test_read_at_range_end(self):
+        f = self._file
+        self.assertEquals(self.alpha, f.read())
+        self.assertEquals(self.alpha, f.read())
+        self.assertEquals(self.alpha.upper(), f.read())
+        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
+
 
 class TestRangeFileVarious(tests.TestCase):
     """Tests RangeFile aspects not covered elsewhere."""

=== modified file 'bzrlib/tests/test_transport.py'
--- a/bzrlib/tests/test_transport.py	2007-12-08 23:15:18 +0000
+++ b/bzrlib/tests/test_transport.py	2007-12-10 10:41:24 +0000
@@ -180,7 +180,7 @@
         self.check([(0, 20, [(0, 10), (10, 10)])],
                    [(0, 10), (10, 10)])
 
-    # XXX: scary, http.readv() can't handle that
+    # XXX: scary, http.readv() can't handle that --vila20071209
     def test_coalesce_overlapped(self):
         self.check([(0, 15, [(0, 10), (5, 10)])],
                    [(0, 10), (5, 10)])

=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py	2007-12-04 08:26:24 +0000
+++ b/bzrlib/tests/test_transport_implementations.py	2007-12-10 10:41:24 +0000
@@ -187,14 +187,14 @@
         self.build_tree(files, transport=t, line_endings='binary')
         self.check_transport_contents('contents of a\n', t, 'a')
         content_f = t.get_multi(files)
-        # Use itertools.imap() instead of use zip() or map(), since they fully
+        # Use itertools.izip() instead of use zip() or map(), since they fully
         # evaluate their inputs, the transport requests should be issued and
         # handled sequentially (we don't want to force transport to buffer).
         for content, f in itertools.izip(contents, content_f):
             self.assertEqual(content, f.read())
 
         content_f = t.get_multi(iter(files))
-        # Use itertools.imap() for the same reason
+        # Use itertools.izip() for the same reason
         for content, f in itertools.izip(contents, content_f):
             self.assertEqual(content, f.read())
 
@@ -983,7 +983,7 @@
         self.check_transport_contents('c this file\n', t, 'b')
 
         # TODO: Try to write a test for atomicity
-        # TODO: Test moving into a non-existant subdirectory
+        # TODO: Test moving into a non-existent subdirectory
         # TODO: Test Transport.move_multi
 
     def test_copy(self):
@@ -1286,7 +1286,7 @@
         self.assertEqual('', t.relpath(t.base))
         # base ends with /
         self.assertEqual('', t.relpath(t.base[:-1]))
-        # subdirs which dont exist should still give relpaths.
+        # subdirs which don't exist should still give relpaths.
         self.assertEqual('foo', t.relpath(t.base + 'foo'))
         # trailing slash should be the same.
         self.assertEqual('foo', t.relpath(t.base + 'foo/'))
@@ -1574,7 +1574,7 @@
                 adjust_for_latency=True, upper_limit=content_size))
             self.assertEqual(1, len(result))
             data_len = len(result[0][1])
-            # minimmum length is from 400 to 1034 - 634
+            # minimum length is from 400 to 1034 - 634
             self.assertTrue(data_len >= 634)
             # must contain the region 400 to 1034
             self.assertTrue(result[0][0] <= 400)

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2007-12-08 23:15:18 +0000
+++ b/bzrlib/transport/http/__init__.py	2007-12-10 10:41:24 +0000
@@ -204,7 +204,7 @@
         :param return: A list or generator of (offset, data) tuples
         """
 
-        # offsets may be a genarator, we will iterate it several times, so
+        # offsets may be a generator, we will iterate it several times, so
         # build a list
         offsets = list(offsets)
 
@@ -227,7 +227,7 @@
             # Cache the data read, but only until it's been used
             data_map = {}
             # We will iterate on the data received from the GET requests and
-            # serve the corresponding offsets repecting the initial order. We
+            # serve the corresponding offsets respecting the initial order. We
             # need an offset iterator for that.
             iter_offsets = iter(offsets)
             cur_offset_and_size = iter_offsets.next()
@@ -275,7 +275,7 @@
         def get_and_yield(relpath, coalesced):
             if coalesced:
                 # Note that the _get below may raise
-                # errors.InvalidHttpRange. It's the caller's responsability to
+                # errors.InvalidHttpRange. It's the caller's responsibility to
                 # decide how to retry since it may provide different coalesced
                 # offsets.
                 code, rfile = self._get(relpath, coalesced)
@@ -290,8 +290,11 @@
             total = len(coalesced)
             if self._range_hint == 'multi':
                 max_ranges = self._max_get_ranges
-            else: # self._range_hint == 'single'
+            elif self._range_hint == 'single':
                 max_ranges = total
+            else:
+                raise AssertionError("Unknown _range_hint %r"
+                                     % (self._range_hint,))
             # TODO: Some web servers may ignore the range requests and return
             # the whole file, we may want to detect that and avoid further
             # requests.

=== modified file 'bzrlib/transport/http/response.py'
--- a/bzrlib/transport/http/response.py	2007-12-08 23:15:18 +0000
+++ b/bzrlib/transport/http/response.py	2007-12-10 10:41:24 +0000
@@ -30,7 +30,7 @@
     )
 
 
-# A RangeFile epxects the following grammar (simplified to outline the
+# A RangeFile expects the following grammar (simplified to outline the
 # assumptions we rely upon).
 
 # file: whole_file
@@ -80,7 +80,7 @@
     def set_boundary(self, boundary):
         """Define the boundary used in a multi parts message.
         
-        The file should be at the beggining of the body, the first range
+        The file should be at the beginning of the body, the first range
         definition is read and taken into account.
         """
         self._boundary = boundary
@@ -92,7 +92,7 @@
         """Read the boundary headers defining a new range"""
         boundary_line = '\r\n'
         while boundary_line == '\r\n':
-            # RFC2616 19.2 Additional CRLFs may preceed the first boundary
+            # RFC2616 19.2 Additional CRLFs may precede the first boundary
             # string entity.
             # To be on the safe side we allow it before any boundary line
             boundary_line = self._file.readline()
@@ -154,6 +154,17 @@
         self._pos += data_len
         return data
 
+    def _seek_to_next_range(self):
+        # We will cross range boundaries
+        if self._boundary is None:
+            # If we don't have a boundary, we can't find another range
+            raise errors.InvalidRange(
+                self._path, self._pos,
+                "Range (%s, %s) exhausted"
+                % (self._start, self._size))
+        self.read_boundary()
+        self.read_range_definition()
+
     def read(self, size=-1):
         """Read size bytes from the current position in the file.
 
@@ -162,10 +173,17 @@
         the final boundary line of a multipart response or for any range
         request not entirely consumed by the client (due to offset coalescing)
         """
-        if self._pos < self._start:
-            raise errors.InvalidRange(self._path, self._pos,
-                                      "Can't read before range (%s, %s)"
-                                      % (self._start, self._size))
+        if (self._size > 0
+            and self._pos == self._start + self._size):
+            if size == 0:
+                return ''
+            else:
+                self._seek_to_next_range()
+        elif self._pos < self._start:
+            raise errors.InvalidRange(
+                self._path, self._pos,
+                "Can't read %s bytes before range (%s, %s)"
+                % (size, self._start, self._size))
         if self._size > 0:
             if size > 0 and self._pos + size > self._start + self._size:
                 raise errors.InvalidRange(
@@ -176,11 +194,13 @@
         if self._size > 0:
             # Don't read past the range definition
             limited = self._start + self._size - self._pos
-            if size > 0:
+            if size >= 0:
                 limited = min(limited, size)
             data = self._file.read(limited)
         else:
-            # Size of file unknown
+            # Size of file unknown, the user may have specified a size or not,
+            # we delegate that to the filesocket object (-1 means read until
+            # EOF)
             data = self._file.read(size)
         # Update _pos respecting the data effectively read
         self._pos += len(data)
@@ -210,19 +230,13 @@
 
         if self._size > 0:
             cur_limit = self._start + self._size
-            while final_pos >= cur_limit:
+            while final_pos > cur_limit:
                 # We will cross range boundaries
                 remain = cur_limit - self._pos
                 if remain > 0:
                     # Finish reading the current range
                     self._checked_read(remain)
-                if self._boundary is None:
-                    # If we don't have a boundary, we can't find another range
-                    raise errors.InvalidRange(self._path, self._pos,
-                                              'Range (%s, %s) exhausted'
-                                              % (self._start, self._size))
-                self.read_boundary()
-                self.read_range_definition()
+                self._seek_to_next_range()
                 cur_limit = self._start + self._size
 
         size = final_pos - self._pos