Rev 3077: Take spiv review comments into account. in file:///v/home/vila/src/bzr/bugs/173010/
Vincent Ladeuil
v.ladeuil+lp at free.fr
Mon Dec 10 10:41:29 GMT 2007
At file:///v/home/vila/src/bzr/bugs/173010/
------------------------------------------------------------
revno: 3077
revision-id:v.ladeuil+lp at free.fr-20071210104124-0brt3h7ed1kiug0v
parent: v.ladeuil+lp at free.fr-20071208231518-sj2ui57xyd4mkjra
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 173010
timestamp: Mon 2007-12-10 11:41:24 +0100
message:
Take spiv review comments into account.
* bzrlib/transport/http/response.py:
(RangeFile._seek_to_next_range): Factored out since this is now
used by both seek and read.
(RangeFile.read): Trigger next range recognition when needed.
(RangeFile.seek): Don't seek over the range boundary if not
required to.
* bzrlib/transport/http/__init__.py:
(HttpTransportBase._coalesce_readv.get_and_yield): Add a
prophylactic assertionError.
* bzrlib/tests/test_transport_implementations.py:
Fix typos.
* bzrlib/tests/test_http_response.py:
(TestRangeFileMixin.test_read_zero,
TestRangeFileMixin.test_seek_at_range_end,
TestRangeFileMixin.test_read_at_range_end,
TestRangeFileSizeUnknown.test_read_at_range_end,
TestRangeFilMultipleRanges.test_seek_at_range_end,
TestRangeFilMultipleRanges.test_read_at_range_end): More tests
covering read(0).
modified:
bzrlib/tests/test_http_response.py test_http_response.py-20060628233143-950b2a482a32505d
bzrlib/tests/test_transport.py testtransport.py-20050718175618-e5cdb99f4555ddce
bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
bzrlib/transport/http/response.py _response.py-20060613154423-a2ci7hd4iw5c7fnt-1
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http_response.py'
--- a/bzrlib/tests/test_http_response.py 2007-12-08 14:31:01 +0000
+++ b/bzrlib/tests/test_http_response.py 2007-12-10 10:41:24 +0000
@@ -16,9 +16,8 @@
"""Tests from HTTP response parsing.
-
-We test two main things in this module the RangeFile class and the
-handle_response method.
+The handle_response method read the response body of a GET request an returns
+the corresponding RangeFile.
There are four different kinds of RangeFile:
- a whole file whose size is unknown, seen as a simple byte stream,
@@ -31,12 +30,11 @@
- seek can only be forward (its really a socket underneath),
- read can't cross ranges,
- successive ranges are taken into account transparently,
+
- the expected pattern of use is either seek(offset)+read(size) or a single
- read with no size specified
-
-The handle_response method read the response body of a GET request an returns
-the corresponding RangeFile.
-
+ read with no size specified. For multiple range files, multiple read() will
+ return the corresponding ranges, trying to read further will raise
+ InvalidHttpResponse.
"""
from cStringIO import StringIO
@@ -54,7 +52,7 @@
# A simple string used to represent a file part (also called a range), in
# which offsets are easy to calculate for test writers. It's used as a
- # building block with slight variations but basically 'a' if the first char
+ # building block with slight variations but basically 'a' is the first char
# of the range and 'z' is the last.
alpha = 'abcdefghijklmnopqrstuvwxyz'
@@ -77,8 +75,32 @@
cur += 4
self.assertEquals('klmn', f.read(4))
cur += len('klmn')
+ # read(0) in the middle of a range
+ self.assertEquals('', f.read(0))
+ # seek in place
+ here = f.tell()
+ f.seek(0, 1)
+ self.assertEquals(here, f.tell())
self.assertEquals(cur, f.tell())
+ def test_read_zero(self):
+ f = self._file
+ start = self.first_range_start
+ self.assertEquals('', f.read(0))
+ f.seek(10, 1)
+ self.assertEquals('', f.read(0))
+
+ def test_seek_at_range_end(self):
+ f = self._file
+ f.seek(26, 1)
+
+ def test_read_at_range_end(self):
+ """Test read behaviour at range end."""
+ f = self._file
+ self.assertEquals(self.alpha, f.read())
+ self.assertEquals('', f.read(0))
+ self.assertRaises(errors.InvalidRange, f.read, 1)
+
def test_unbounded_read_after_seek(self):
f = self._file
f.seek(24, 1)
@@ -143,6 +165,12 @@
"""
self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
+ def test_read_at_range_end(self):
+ """Test read behaviour at range end."""
+ f = self._file
+ self.assertEquals(self.alpha, f.read())
+ self.assertEquals('', f.read(0))
+ self.assertEquals('', f.read(1))
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
"""Test a RangeFile for a whole file whose size is known."""
@@ -173,7 +201,17 @@
self.assertRaises(errors.InvalidRange, f.read, 2)
class TestRangeFilMultipleRanges(tests.TestCase, TestRangeFileMixin):
- """Test a RangeFile for multiple ranges."""
+ """Test a RangeFile for multiple ranges.
+
+ The RangeFile used for the tests contains three ranges:
+
+ - at offset 25: alpha
+ - at offset 100: alpha
+ - at offset 126: alpha.upper()
+
+ The two last ranges are contiguous. This only rarely occurs (should not in
+ fact) in real uses but may lead to hard to track bugs.
+ """
def setUp(self):
super(TestRangeFilMultipleRanges, self).setUp()
@@ -248,7 +286,7 @@
def test_seek_from_end(self):
"""See TestRangeFileMixin.test_seek_from_end."""
# The actual implementation will seek from end for the first range only
- # and then fail. Since seeking from end is intented to be used for a
+ # and then fail. Since seeking from end is intended to be used for a
# single range only anyway, this test just document the actual
# behaviour.
f = self._file
@@ -268,7 +306,7 @@
f.seek(100)
f.seek(125)
- def test_seek_above_ranges(self):
+ def test_seek_across_ranges(self):
f = self._file
start = self.first_range_start
f.seek(126) # skip the two first ranges
@@ -281,6 +319,20 @@
# Now the file is positioned at the second range start (100)
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
+ def test_seek_at_range_end(self):
+ """Test seek behavior at range end."""
+ f = self._file
+ f.seek(25 + 25)
+ f.seek(100 + 25)
+ f.seek(126 + 25)
+
+ def test_read_at_range_end(self):
+ f = self._file
+ self.assertEquals(self.alpha, f.read())
+ self.assertEquals(self.alpha, f.read())
+ self.assertEquals(self.alpha.upper(), f.read())
+ self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
+
class TestRangeFileVarious(tests.TestCase):
"""Tests RangeFile aspects not covered elsewhere."""
=== modified file 'bzrlib/tests/test_transport.py'
--- a/bzrlib/tests/test_transport.py 2007-12-08 23:15:18 +0000
+++ b/bzrlib/tests/test_transport.py 2007-12-10 10:41:24 +0000
@@ -180,7 +180,7 @@
self.check([(0, 20, [(0, 10), (10, 10)])],
[(0, 10), (10, 10)])
- # XXX: scary, http.readv() can't handle that
+ # XXX: scary, http.readv() can't handle that --vila20071209
def test_coalesce_overlapped(self):
self.check([(0, 15, [(0, 10), (5, 10)])],
[(0, 10), (5, 10)])
=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py 2007-12-04 08:26:24 +0000
+++ b/bzrlib/tests/test_transport_implementations.py 2007-12-10 10:41:24 +0000
@@ -187,14 +187,14 @@
self.build_tree(files, transport=t, line_endings='binary')
self.check_transport_contents('contents of a\n', t, 'a')
content_f = t.get_multi(files)
- # Use itertools.imap() instead of use zip() or map(), since they fully
+ # Use itertools.izip() instead of use zip() or map(), since they fully
# evaluate their inputs, the transport requests should be issued and
# handled sequentially (we don't want to force transport to buffer).
for content, f in itertools.izip(contents, content_f):
self.assertEqual(content, f.read())
content_f = t.get_multi(iter(files))
- # Use itertools.imap() for the same reason
+ # Use itertools.izip() for the same reason
for content, f in itertools.izip(contents, content_f):
self.assertEqual(content, f.read())
@@ -983,7 +983,7 @@
self.check_transport_contents('c this file\n', t, 'b')
# TODO: Try to write a test for atomicity
- # TODO: Test moving into a non-existant subdirectory
+ # TODO: Test moving into a non-existent subdirectory
# TODO: Test Transport.move_multi
def test_copy(self):
@@ -1286,7 +1286,7 @@
self.assertEqual('', t.relpath(t.base))
# base ends with /
self.assertEqual('', t.relpath(t.base[:-1]))
- # subdirs which dont exist should still give relpaths.
+ # subdirs which don't exist should still give relpaths.
self.assertEqual('foo', t.relpath(t.base + 'foo'))
# trailing slash should be the same.
self.assertEqual('foo', t.relpath(t.base + 'foo/'))
@@ -1574,7 +1574,7 @@
adjust_for_latency=True, upper_limit=content_size))
self.assertEqual(1, len(result))
data_len = len(result[0][1])
- # minimmum length is from 400 to 1034 - 634
+ # minimum length is from 400 to 1034 - 634
self.assertTrue(data_len >= 634)
# must contain the region 400 to 1034
self.assertTrue(result[0][0] <= 400)
=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py 2007-12-08 23:15:18 +0000
+++ b/bzrlib/transport/http/__init__.py 2007-12-10 10:41:24 +0000
@@ -204,7 +204,7 @@
:param return: A list or generator of (offset, data) tuples
"""
- # offsets may be a genarator, we will iterate it several times, so
+ # offsets may be a generator, we will iterate it several times, so
# build a list
offsets = list(offsets)
@@ -227,7 +227,7 @@
# Cache the data read, but only until it's been used
data_map = {}
# We will iterate on the data received from the GET requests and
- # serve the corresponding offsets repecting the initial order. We
+ # serve the corresponding offsets respecting the initial order. We
# need an offset iterator for that.
iter_offsets = iter(offsets)
cur_offset_and_size = iter_offsets.next()
@@ -275,7 +275,7 @@
def get_and_yield(relpath, coalesced):
if coalesced:
# Note that the _get below may raise
- # errors.InvalidHttpRange. It's the caller's responsability to
+ # errors.InvalidHttpRange. It's the caller's responsibility to
# decide how to retry since it may provide different coalesced
# offsets.
code, rfile = self._get(relpath, coalesced)
@@ -290,8 +290,11 @@
total = len(coalesced)
if self._range_hint == 'multi':
max_ranges = self._max_get_ranges
- else: # self._range_hint == 'single'
+ elif self._range_hint == 'single':
max_ranges = total
+ else:
+ raise AssertionError("Unknown _range_hint %r"
+ % (self._range_hint,))
# TODO: Some web servers may ignore the range requests and return
# the whole file, we may want to detect that and avoid further
# requests.
=== modified file 'bzrlib/transport/http/response.py'
--- a/bzrlib/transport/http/response.py 2007-12-08 23:15:18 +0000
+++ b/bzrlib/transport/http/response.py 2007-12-10 10:41:24 +0000
@@ -30,7 +30,7 @@
)
-# A RangeFile epxects the following grammar (simplified to outline the
+# A RangeFile expects the following grammar (simplified to outline the
# assumptions we rely upon).
# file: whole_file
@@ -80,7 +80,7 @@
def set_boundary(self, boundary):
"""Define the boundary used in a multi parts message.
- The file should be at the beggining of the body, the first range
+ The file should be at the beginning of the body, the first range
definition is read and taken into account.
"""
self._boundary = boundary
@@ -92,7 +92,7 @@
"""Read the boundary headers defining a new range"""
boundary_line = '\r\n'
while boundary_line == '\r\n':
- # RFC2616 19.2 Additional CRLFs may preceed the first boundary
+ # RFC2616 19.2 Additional CRLFs may precede the first boundary
# string entity.
# To be on the safe side we allow it before any boundary line
boundary_line = self._file.readline()
@@ -154,6 +154,17 @@
self._pos += data_len
return data
+ def _seek_to_next_range(self):
+ # We will cross range boundaries
+ if self._boundary is None:
+ # If we don't have a boundary, we can't find another range
+ raise errors.InvalidRange(
+ self._path, self._pos,
+ "Range (%s, %s) exhausted"
+ % (self._start, self._size))
+ self.read_boundary()
+ self.read_range_definition()
+
def read(self, size=-1):
"""Read size bytes from the current position in the file.
@@ -162,10 +173,17 @@
the final boundary line of a multipart response or for any range
request not entirely consumed by the client (due to offset coalescing)
"""
- if self._pos < self._start:
- raise errors.InvalidRange(self._path, self._pos,
- "Can't read before range (%s, %s)"
- % (self._start, self._size))
+ if (self._size > 0
+ and self._pos == self._start + self._size):
+ if size == 0:
+ return ''
+ else:
+ self._seek_to_next_range()
+ elif self._pos < self._start:
+ raise errors.InvalidRange(
+ self._path, self._pos,
+ "Can't read %s bytes before range (%s, %s)"
+ % (size, self._start, self._size))
if self._size > 0:
if size > 0 and self._pos + size > self._start + self._size:
raise errors.InvalidRange(
@@ -176,11 +194,13 @@
if self._size > 0:
# Don't read past the range definition
limited = self._start + self._size - self._pos
- if size > 0:
+ if size >= 0:
limited = min(limited, size)
data = self._file.read(limited)
else:
- # Size of file unknown
+ # Size of file unknown, the user may have specified a size or not,
+ # we delegate that to the filesocket object (-1 means read until
+ # EOF)
data = self._file.read(size)
# Update _pos respecting the data effectively read
self._pos += len(data)
@@ -210,19 +230,13 @@
if self._size > 0:
cur_limit = self._start + self._size
- while final_pos >= cur_limit:
+ while final_pos > cur_limit:
# We will cross range boundaries
remain = cur_limit - self._pos
if remain > 0:
# Finish reading the current range
self._checked_read(remain)
- if self._boundary is None:
- # If we don't have a boundary, we can't find another range
- raise errors.InvalidRange(self._path, self._pos,
- 'Range (%s, %s) exhausted'
- % (self._start, self._size))
- self.read_boundary()
- self.read_range_definition()
+ self._seek_to_next_range()
cur_limit = self._start + self._size
size = final_pos - self._pos
More information about the bazaar-commits
mailing list