Rev 2513: Change read/iter_records to return a callable, add more validation, and in http://bazaar.launchpad.net/~bzr/bzr/container-format
Andrew Bennetts
andrew.bennetts at canonical.com
Thu Jun 14 14:30:30 BST 2007
At http://bazaar.launchpad.net/~bzr/bzr/container-format
------------------------------------------------------------
revno: 2513
revision-id: andrew.bennetts at canonical.com-20070614132802-bas89f67tqq4p3s6
parent: andrew.bennetts at canonical.com-20070614055245-rtwk0vgz74fyyimo
parent: andrew.bennetts at canonical.com-20070614125513-nua0p6bw9cw3jeaq
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Thu 2007-06-14 23:28:02 +1000
message:
Change read/iter_records to return a callable, add more validation, and
improve docstrings.
modified:
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
bzrlib/tests/test_errors.py test_errors.py-20060210110251-41aba2deddf936a8
bzrlib/tests/test_pack.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
------------------------------------------------------------
revno: 2512.1.2
revision-id: andrew.bennetts at canonical.com-20070614125513-nua0p6bw9cw3jeaq
parent: andrew.bennetts at canonical.com-20070614112338-6u3900u6nkag66u8
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Thu 2007-06-14 22:55:13 +1000
message:
Docstring improvements.
------------------------------------------------------------
revno: 2512.1.1
revision-id: andrew.bennetts at canonical.com-20070614112338-6u3900u6nkag66u8
parent: andrew.bennetts at canonical.com-20070614055245-rtwk0vgz74fyyimo
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Thu 2007-06-14 21:23:38 +1000
message:
Return a callable instead of a str from read, and add more validation.
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2007-06-14 05:52:45 +0000
+++ b/bzrlib/errors.py 2007-06-14 11:23:38 +0000
@@ -2178,3 +2178,11 @@
def __init__(self, excess):
self.excess = excess
+
+class DuplicateRecordNameError(ContainerError):
+
+ _fmt = "Container has multiple records with the same name: \"%(name)s\""
+
+ def __init__(self, name):
+ self.name = name
+
=== modified file 'bzrlib/pack.py'
--- a/bzrlib/pack.py 2007-06-14 05:52:45 +0000
+++ b/bzrlib/pack.py 2007-06-14 13:28:02 +0000
@@ -37,12 +37,26 @@
name.
:raises InvalidRecordError: if name is not valid.
+ :seealso: _check_name_encoding
"""
- # XXX: consider checking that name is a str of valid UTF-8 too?
if _whitespace_re.search(name) is not None:
raise errors.InvalidRecordError("%r is not a valid name." % (name,))
+def _check_name_encoding(name):
+ """Check that 'name' is valid UTF-8.
+
+ This is separate from _check_name because UTF-8 decoding is relatively
+ expensive, and we usually want to avoid it.
+
+ :raises InvalidRecordError: if name is not valid UTF-8.
+ """
+ try:
+ name.decode('utf-8')
+ except UnicodeDecodeError, e:
+ raise errors.InvalidRecordError(str(e))
+
+
class ContainerWriter(object):
"""A class for writing containers."""
@@ -119,24 +133,55 @@
def iter_records(self):
"""Iterate over the container, yielding each record as it is read.
- Each yielded record will be a 2-tuple of (names, bytes), where names is
- a ``list`` and bytes is a ``str``.
-
- :raises UnknownContainerFormatError: if the format of the container is
- unrecognised.
+ Each yielded record will be a 2-tuple of (names, callable), where names
+ is a ``list`` and bytes is a function that takes one argument,
+ ``max_length``.
+
+ You **must not** call the callable after advancing the interator to the
+ next record. That is, this code is invalid::
+
+ record_iter = container.iter_records()
+ names1, callable1 = record_iter.next()
+ names2, callable2 = record_iter.next()
+ bytes1 = callable1(None)
+
+ As it will give incorrect results and invalidate the state of the
+ ContainerReader.
+
+ :raises ContainerError: if any sort of containter corruption is
+ detected, e.g. UnknownContainerFormatError is the format of the
+ container is unrecognised.
+ :seealso: ContainerReader.read
"""
- format = self._read_line()
- if format != FORMAT_ONE:
- raise errors.UnknownContainerFormatError(format)
+ self._read_format()
return self._iter_records()
+ def iter_record_objects(self):
+ """Iterate over the container, yielding each record as it is read.
+
+ Each yielded record will be an object with ``read`` and ``validate``
+ methods. Like with iter_records, it is not safe to use a record object
+ after advancing the iterator to yield next record.
+
+ :raises ContainerError: if any sort of containter corruption is
+ detected, e.g. UnknownContainerFormatError is the format of the
+ container is unrecognised.
+ :seealso: iter_records
+ """
+ self._read_format()
+ return self._iter_record_objects()
+
def _iter_records(self):
+ for record in self._iter_record_objects():
+ yield record.read()
+
+ def _iter_record_objects(self):
while True:
record_kind = self.reader_func(1)
if record_kind == 'B':
# Bytes record.
reader = BytesRecordReader(self.reader_func)
- yield reader.read()
+ yield reader
elif record_kind == 'E':
# End marker. There are no more records.
return
@@ -148,17 +193,32 @@
# Unknown record type.
raise errors.UnknownRecordTypeError(record_kind)
+ def _read_format(self):
+ format = self._read_line()
+ if format != FORMAT_ONE:
+ raise errors.UnknownContainerFormatError(format)
+
def validate(self):
"""Validate this container and its records.
- You can either validate or iter_records, you can't do both.
+ Validating consumes the data stream just like iter_records and
+ iter_record_objects, so you cannot call it after
+ iter_records/iter_record_objects.
:raises ContainerError: if something is invalid.
"""
- for names, bytes in self.iter_records():
- # XXX: bytes is str, should be callable to get bytes.
- # callable()
- pass
+ all_names = set()
+ for record_names, read_bytes in self.iter_records():
+ read_bytes(None)
+ for name in record_names:
+ _check_name_encoding(name)
+ # Check that the name is unique. Note that Python will refuse
+ # to decode non-shortest forms of UTF-8 encoding, so there is no
+ # risk that the same unicode string has been encoded two
+ # different ways.
+ if name in all_names:
+ raise errors.DuplicateRecordNameError(name)
+ all_names.add(name)
excess_bytes = self.reader_func(1)
if excess_bytes != '':
raise errors.ContainerHasExcessDataError(excess_bytes)
@@ -169,9 +229,13 @@
def read(self):
"""Read this record.
- You can either validate or read, you can't do both.
+ You can either validate or read a record, you can't do both.
- :returns: (names, bytes)
+ :returns: A tuple of (names, callable). The callable can be called
+ repeatedly to obtain the bytes for the record, with a max_length
+ argument. If max_length is None, returns all the bytes. Because
+ records can be arbitrarily large, using None is not recommended
+ unless you have reason to believe the content will fit in memory.
"""
# Read the content length.
length_line = self._read_line()
@@ -189,10 +253,20 @@
break
_check_name(name)
names.append(name)
- bytes = self.reader_func(length)
- if len(bytes) != length:
+
+ self._remaining_length = length
+ return names, self._content_reader
+
+ def _content_reader(self, max_length):
+ if max_length is None:
+ length_to_read = self._remaining_length
+ else:
+ length_to_read = min(max_length, self._remaining_length)
+ self._remaining_length -= length_to_read
+ bytes = self.reader_func(length_to_read)
+ if len(bytes) != length_to_read:
raise errors.UnexpectedEndOfContainerError()
- return names, bytes
+ return bytes
def validate(self):
"""Validate this record.
@@ -201,4 +275,8 @@
:raises ContainerError: if this record is invalid.
"""
- self.read()
+ names, read_bytes = self.read()
+ for name in names:
+ _check_name_encoding(name)
+ read_bytes(None)
+
=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py 2007-06-14 05:52:45 +0000
+++ b/bzrlib/tests/test_errors.py 2007-06-14 11:23:38 +0000
@@ -300,6 +300,13 @@
"Container has data after end marker: 'excess bytes'",
str(e))
+ def test_duplicate_record_name_error(self):
+ """Test the formatting of DuplicateRecordNameError."""
+ e = errors.DuplicateRecordNameError(u"n\xe5me".encode('utf-8'))
+ self.assertEqual(
+ "Container has multiple records with the same name: \"n\xc3\xa5me\"",
+ str(e))
+
class PassThroughError(errors.BzrError):
=== modified file 'bzrlib/tests/test_pack.py'
--- a/bzrlib/tests/test_pack.py 2007-06-14 05:52:45 +0000
+++ b/bzrlib/tests/test_pack.py 2007-06-14 11:23:38 +0000
@@ -136,19 +136,30 @@
"""
reader = self.get_reader_for("Bazaar pack format 1\nB5\n\naaaaaE")
expected_records = [([], 'aaaaa')]
- self.assertEqual(expected_records, list(reader.iter_records()))
+ self.assertEqual(
+ expected_records,
+ [(names, read_bytes(None))
+ for (names, read_bytes) in reader.iter_records()])
def test_validate_empty_container(self):
+ """validate does not raise an error for a container with no records."""
reader = self.get_reader_for("Bazaar pack format 1\nE")
# No exception raised
reader.validate()
def test_validate_non_empty_valid_container(self):
+ """validate does not raise an error for a container with a valid record.
+ """
reader = self.get_reader_for("Bazaar pack format 1\nB3\nname\n\nabcE")
# No exception raised
reader.validate()
def test_validate_bad_format(self):
+ """validate raises an error for unrecognised format strings.
+
+ It may raise either UnexpectedEndOfContainerError or
+ UnknownContainerFormatError, depending on exactly what the string is.
+ """
inputs = ["", "x", "Bazaar pack format 1", "bad\n"]
for input in inputs:
reader = self.get_reader_for(input)
@@ -158,22 +169,47 @@
reader.validate)
def test_validate_bad_record_marker(self):
+ """validate raises UnknownRecordTypeError for unrecognised record
+ types.
+ """
reader = self.get_reader_for("Bazaar pack format 1\nX")
self.assertRaises(errors.UnknownRecordTypeError, reader.validate)
def test_validate_data_after_end_marker(self):
+ """validate raises ContainerHasExcessDataError if there are any bytes
+ after the end of the container.
+ """
reader = self.get_reader_for("Bazaar pack format 1\nEcrud")
self.assertRaises(
errors.ContainerHasExcessDataError, reader.validate)
def test_validate_no_end_marker(self):
+ """validate raises UnexpectedEndOfContainerError if there's no end of
+ container marker, even if the container up to this point has been valid.
+ """
reader = self.get_reader_for("Bazaar pack format 1\n")
self.assertRaises(
errors.UnexpectedEndOfContainerError, reader.validate)
+ def test_validate_duplicate_name(self):
+ """validate raises DuplicateRecordNameError if the same name occurs
+ multiple times in the container.
+ """
+ reader = self.get_reader_for(
+ "Bazaar pack format 1\n"
+ "B0\nname\n\n"
+ "B0\nname\n\n"
+ "E")
+ self.assertRaises(errors.DuplicateRecordNameError, reader.validate)
+
+ def test_validate_undecodeable_name(self):
+ """Names that aren't valid UTF-8 cause validate to fail."""
+ reader = self.get_reader_for("Bazaar pack format 1\nB0\n\xcc\n\nE")
+ self.assertRaises(errors.InvalidRecordError, reader.validate)
+
class TestBytesRecordReader(tests.TestCase):
- """Tests for parsing Bytes records with BytesRecordReader."""
+ """Tests for reading and validating Bytes records with BytesRecordReader."""
def get_reader_for(self, bytes):
stream = StringIO(bytes)
@@ -185,26 +221,26 @@
names.
"""
reader = self.get_reader_for("5\n\naaaaa")
- names, bytes = reader.read()
+ names, get_bytes = reader.read()
self.assertEqual([], names)
- self.assertEqual('aaaaa', bytes)
+ self.assertEqual('aaaaa', get_bytes(None))
def test_record_with_one_name(self):
"""Reading a Bytes record with one name returns a list of just that
name.
"""
reader = self.get_reader_for("5\nname1\n\naaaaa")
- names, bytes = reader.read()
+ names, get_bytes = reader.read()
self.assertEqual(['name1'], names)
- self.assertEqual('aaaaa', bytes)
+ self.assertEqual('aaaaa', get_bytes(None))
def test_record_with_two_names(self):
"""Reading a Bytes record with two names returns a list of both names.
"""
reader = self.get_reader_for("5\nname1\nname2\n\naaaaa")
- names, bytes = reader.read()
+ names, get_bytes = reader.read()
self.assertEqual(['name1', 'name2'], names)
- self.assertEqual('aaaaa', bytes)
+ self.assertEqual('aaaaa', get_bytes(None))
def test_invalid_length(self):
"""If the length-prefix is not a number, parsing raises
@@ -225,16 +261,19 @@
"""
complete_record = "6\nname\n\nabcdef"
for count in range(0, len(complete_record)):
- reader = self.get_reader_for(complete_record[:count])
- # We don't use assertRaises to make diagnosing failures easier.
+ incomplete_record = complete_record[:count]
+ reader = self.get_reader_for(incomplete_record)
+ # We don't use assertRaises to make diagnosing failures easier
+ # (assertRaises doesn't allow a custom failure message).
try:
- reader.read()
+ names, read_bytes = reader.read()
+ read_bytes(None)
except errors.UnexpectedEndOfContainerError:
pass
else:
self.fail(
"UnexpectedEndOfContainerError not raised when parsing %r"
- % (input.getvalue()))
+ % (incomplete_record,))
def test_initial_eof(self):
"""EOF before any bytes read at all."""
@@ -251,7 +290,7 @@
reader = self.get_reader_for("123\nname")
self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
- def test_invalid_name_whitespace(self):
+ def test_read_invalid_name_whitespace(self):
"""Names must have no whitespace."""
# A name with a space.
reader = self.get_reader_for("0\nbad name\n\n")
@@ -266,21 +305,57 @@
self.assertRaises(errors.InvalidRecordError, reader.read)
def test_validate_whitespace_in_name(self):
- reader = self.get_reader_for("0\nbad name\n\nE")
+ """Names must have no whitespace."""
+ reader = self.get_reader_for("0\nbad name\n\n")
self.assertRaises(errors.InvalidRecordError, reader.validate)
def test_validate_interrupted_prelude(self):
+ """EOF during reading a record's prelude causes validate to fail."""
reader = self.get_reader_for("")
self.assertRaises(
errors.UnexpectedEndOfContainerError, reader.validate)
def test_validate_interrupted_body(self):
+ """EOF during reading a record's body causes validate to fail."""
reader = self.get_reader_for("1\n\n")
self.assertRaises(
errors.UnexpectedEndOfContainerError, reader.validate)
def test_validate_unparseable_length(self):
+ """An unparseable record length causes validate to fail."""
reader = self.get_reader_for("\n\n")
self.assertRaises(
errors.InvalidRecordError, reader.validate)
+ def test_validate_undecodeable_name(self):
+ """Names that aren't valid UTF-8 cause validate to fail."""
+ reader = self.get_reader_for("0\n\xcc\n\n")
+ self.assertRaises(errors.InvalidRecordError, reader.validate)
+
+ def test_read_max_length(self):
+ """If the max_length passed to the callable returned by read is not
+ None, then no more than that many bytes will be read.
+ """
+ reader = self.get_reader_for("6\n\nabcdef")
+ names, get_bytes = reader.read()
+ self.assertEqual('abc', get_bytes(3))
+
+ def test_read_no_max_length(self):
+ """If the max_length passed to the callable returned by read is None,
+ then all the bytes in the record will be read.
+ """
+ reader = self.get_reader_for("6\n\nabcdef")
+ names, get_bytes = reader.read()
+ self.assertEqual('abcdef', get_bytes(None))
+
+ def test_repeated_read_calls(self):
+ """Repeated calls to the callable returned from BytesRecordReader.read
+ will not read beyond the end of the record.
+ """
+ reader = self.get_reader_for("6\n\nabcdefB3\nnext-record\nXXX")
+ names, get_bytes = reader.read()
+ self.assertEqual('abcdef', get_bytes(None))
+ self.assertEqual('', get_bytes(None))
+ self.assertEqual('', get_bytes(99))
+
+
More information about the bazaar-commits
mailing list