Rev 2508: More improvements, especially in error handling. in http://bazaar.launchpad.net/~bzr/bzr/container-format
Andrew Bennetts
andrew.bennetts at canonical.com
Sat Jun 9 04:51:36 BST 2007
At http://bazaar.launchpad.net/~bzr/bzr/container-format
------------------------------------------------------------
revno: 2508
revision-id: andrew.bennetts at canonical.com-20070609034820-t7u540w5pyhvtgn3
parent: andrew.bennetts at canonical.com-20070607160934-jfs1wrxxtulso9nw
parent: andrew.bennetts at canonical.com-20070609034525-j9d7i5dlk6ou97eb
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Sat 2007-06-09 13:48:20 +1000
message:
More improvements, especially in error handling.
renamed:
bzrlib/container.py => bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
bzrlib/tests/test_container.py => bzrlib/tests/test_pack.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
modified:
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/test_errors.py test_errors.py-20060210110251-41aba2deddf936a8
bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
bzrlib/tests/test_pack.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
------------------------------------------------------------
revno: 2507.1.3
revision-id: andrew.bennetts at canonical.com-20070609034525-j9d7i5dlk6ou97eb
parent: andrew.bennetts at canonical.com-20070608064547-vzhyegqx2vl6pni3
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Sat 2007-06-09 13:45:25 +1000
message:
Deal with EOF in the middle of a bytes record.
------------------------------------------------------------
revno: 2507.1.2
revision-id: andrew.bennetts at canonical.com-20070608064547-vzhyegqx2vl6pni3
parent: andrew.bennetts at canonical.com-20070608063359-s5ps81a8i85w7by0
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Fri 2007-06-08 16:45:47 +1000
message:
Test docstring tweaks, inspired by looking over the output of jml's testdoc tool.
------------------------------------------------------------
revno: 2507.1.1
revision-id: andrew.bennetts at canonical.com-20070608063359-s5ps81a8i85w7by0
parent: andrew.bennetts at canonical.com-20070607160934-jfs1wrxxtulso9nw
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Fri 2007-06-08 16:33:59 +1000
message:
More progress:
* Rename container.py to pack.py
* Refactor bytes record reading into a separate class for ease of unit testing.
* Start handling error conditions such as invalid content lengths in byte
records.
=== renamed file 'bzrlib/container.py' => 'bzrlib/pack.py'
--- a/bzrlib/container.py 2007-06-07 16:09:34 +0000
+++ b/bzrlib/pack.py 2007-06-09 03:45:25 +0000
@@ -19,31 +19,90 @@
"Containers" and "records" are described in doc/developers/container-format.txt.
"""
-# XXX: probably rename this to pack.py
-
from bzrlib import errors
FORMAT_ONE = "bzr pack format 1"
-class ContainerReader(object):
- """A class for reading Bazaar's container format."""
+class ContainerWriter(object):
+ """A class for writing containers."""
+
+ def __init__(self, write_func):
+ """Constructor.
+
+ :param write_func: a callable that will be called when this
+ ContainerWriter needs to write some bytes.
+ """
+ self.write_func = write_func
+
+ def begin(self):
+ """Begin writing a container."""
+ self.write_func(FORMAT_ONE + "\n")
+
+ def end(self):
+ """Finish writing a container."""
+ self.write_func("E")
+
+ def add_bytes_record(self, bytes, names):
+ """Add a Bytes record with the given names."""
+ # Kind marker
+ self.write_func("B")
+ # Length
+ self.write_func(str(len(bytes)) + "\n")
+ # Names
+ for name in names:
+ self.write_func(name + "\n")
+ # End of headers
+ self.write_func("\n")
+ # Finally, the contents.
+ self.write_func(bytes)
+
+
+class BaseReader(object):
def __init__(self, reader_func):
"""Constructor.
:param reader_func: a callable that takes one optional argument,
``size``, and returns at most that many bytes. When the callable
- returns an empty string, then at most that many bytes are read.
+ returns less than the requested number of bytes, then the end of the
+ file/stream has been reached.
"""
self.reader_func = reader_func
+ def _read_line(self):
+ """Read a line from the input stream.
+
+ This is a simple but inefficient implementation that just reads one byte
+ at a time. Lines should not be very long, so this is probably
+ tolerable.
+
+ :returns: a line, without the trailing newline
+ """
+ # XXX: Have a maximum line length, to prevent malicious input from
+ # consuming an unreasonable amount of resources?
+ # -- Andrew Bennetts, 2007-05-07.
+ line = ''
+ while not line.endswith('\n'):
+ byte = self.reader_func(1)
+ if byte == '':
+ raise errors.UnexpectedEndOfContainerError()
+ line += byte
+ return line[:-1]
+
+
+class ContainerReader(BaseReader):
+ """A class for reading Bazaar's container format."""
+
def iter_records(self):
"""Iterate over the container, yielding each record as it is read.
Each yielded record will be a 2-tuple of (names, bytes), where names is
a ``list`` and bytes is a ``str`.
+
+ :raises UnknownContainerFormatError: if the format of the container is
+ unrecognised.
"""
format = self._read_line()
if format != FORMAT_ONE:
@@ -55,7 +114,8 @@
record_kind = self.reader_func(1)
if record_kind == 'B':
# Bytes record.
- yield self._read_bytes_record()
+ reader = BytesRecordReader(self.reader_func)
+ yield reader.read()
elif record_kind == 'E':
# End marker. There are no more records.
return
@@ -67,8 +127,53 @@
# Unknown record type.
raise errors.UnknownRecordTypeError(record_kind)
- def _read_bytes_record(self):
- length = int(self._read_line())
+
+class ContainerWriter(object):
+ """A class for writing containers."""
+
+ def __init__(self, write_func):
+ """Constructor.
+
+ :param write_func: a callable that will be called when this
+ ContainerWriter needs to write some bytes.
+ """
+ self.write_func = write_func
+
+ def begin(self):
+ """Begin writing a container."""
+ self.write_func(FORMAT_ONE + "\n")
+
+ def end(self):
+ """Finish writing a container."""
+ self.write_func("E")
+
+ def add_bytes_record(self, bytes, names):
+ """Add a Bytes record with the given names."""
+ # Kind marker
+ self.write_func("B")
+ # Length
+ self.write_func(str(len(bytes)) + "\n")
+ # Names
+ for name in names:
+ self.write_func(name + "\n")
+ # End of headers
+ self.write_func("\n")
+ # Finally, the contents.
+ self.write_func(bytes)
+
+
+class BytesRecordReader(BaseReader):
+
+ def read(self):
+ # Read the content length.
+ length_line = self._read_line()
+ try:
+ length = int(length_line)
+ except ValueError:
+ raise errors.InvalidRecordError(
+ "%r is not a valid length." % (length_line,))
+
+ # Read the list of names.
names = []
while True:
name = self._read_line()
@@ -76,57 +181,7 @@
break
names.append(name)
bytes = self.reader_func(length)
- # XXX: deal with case where len(bytes) != length
+ if len(bytes) != length:
+ raise errors.UnexpectedEndOfContainerError()
return names, bytes
- def _read_line(self):
- """Read a line from the input stream.
-
- This is a simple but inefficient implementation that just reads one byte
- at a time. Lines should not be very long, so this is probably
- tolerable.
-
- :returns: a line, without the trailing newline
- """
- # XXX: Have a maximum line length, to prevent malicious input from
- # consuming an unreasonable amount of resources?
- # -- Andrew Bennetts, 2007-05-07.
- line = ''
- while not line.endswith('\n'):
- line += self.reader_func(1)
- return line[:-1]
-
-
-class ContainerWriter(object):
- """A class for writing containers."""
-
- def __init__(self, write_func):
- """Constructor.
-
- :param write_func: a callable that will be called when this
- ContainerWriter needs to write some bytes.
- """
- self.write_func = write_func
-
- def begin(self):
- """Begin writing a container."""
- self.write_func(FORMAT_ONE + "\n")
-
- def end(self):
- """Finish writing a container."""
- self.write_func("E")
-
- def add_bytes_record(self, bytes, names):
- """Add a Bytes record with the given names."""
- # Kind marker
- self.write_func("B")
- # Length
- self.write_func(str(len(bytes)) + "\n")
- # Names
- for name in names:
- self.write_func(name + "\n")
- # End of headers
- self.write_func("\n")
- # Finally, the contents.
- self.write_func(bytes)
-
=== renamed file 'bzrlib/tests/test_container.py' => 'bzrlib/tests/test_pack.py'
--- a/bzrlib/tests/test_container.py 2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/test_pack.py 2007-06-09 03:45:25 +0000
@@ -14,16 +14,15 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""Tests for bzrlib.container."""
+"""Tests for bzrlib.pack."""
from cStringIO import StringIO
-from bzrlib import container, errors
-from bzrlib.tests import TestCase
-
-
-class TestContainerWriter(TestCase):
+from bzrlib import pack, errors, tests
+
+
+class TestContainerWriter(tests.TestCase):
def test_construct(self):
"""Test constructing a ContainerWriter.
@@ -31,19 +30,19 @@
This uses None as the output stream to show that the constructor doesn't
try to use the output stream.
"""
- writer = container.ContainerWriter(None)
+ writer = pack.ContainerWriter(None)
def test_begin(self):
- """Test the begin() method."""
+ """The begin() method writes the container format marker line."""
output = StringIO()
- writer = container.ContainerWriter(output.write)
+ writer = pack.ContainerWriter(output.write)
writer.begin()
self.assertEqual('bzr pack format 1\n', output.getvalue())
def test_end(self):
- """Test the end() method."""
+ """The end() method writes an End Marker record."""
output = StringIO()
- writer = container.ContainerWriter(output.write)
+ writer = pack.ContainerWriter(output.write)
writer.begin()
writer.end()
self.assertEqual('bzr pack format 1\nE', output.getvalue())
@@ -51,7 +50,7 @@
def test_add_bytes_record_no_name(self):
"""Add a bytes record with no name."""
output = StringIO()
- writer = container.ContainerWriter(output.write)
+ writer = pack.ContainerWriter(output.write)
writer.begin()
writer.add_bytes_record('abc', names=[])
self.assertEqual('bzr pack format 1\nB3\n\nabc', output.getvalue())
@@ -59,7 +58,7 @@
def test_add_bytes_record_one_name(self):
"""Add a bytes record with one name."""
output = StringIO()
- writer = container.ContainerWriter(output.write)
+ writer = pack.ContainerWriter(output.write)
writer.begin()
writer.add_bytes_record('abc', names=['name1'])
self.assertEqual('bzr pack format 1\nB3\nname1\n\nabc',
@@ -68,14 +67,14 @@
def test_add_bytes_record_two_names(self):
"""Add a bytes record with two names."""
output = StringIO()
- writer = container.ContainerWriter(output.write)
+ writer = pack.ContainerWriter(output.write)
writer.begin()
writer.add_bytes_record('abc', names=['name1', 'name2'])
self.assertEqual('bzr pack format 1\nB3\nname1\nname2\n\nabc',
output.getvalue())
-class TestContainerReader(TestCase):
+class TestContainerReader(tests.TestCase):
def test_construct(self):
"""Test constructing a ContainerReader.
@@ -83,18 +82,18 @@
This uses None as the output stream to show that the constructor doesn't
try to use the input stream.
"""
- reader = container.ContainerReader(None)
+ reader = pack.ContainerReader(None)
def test_empty_container(self):
"""Read an empty container."""
input = StringIO("bzr pack format 1\nE")
- reader = container.ContainerReader(input.read)
+ reader = pack.ContainerReader(input.read)
self.assertEqual([], list(reader.iter_records()))
def test_unknown_format(self):
"""Unrecognised container formats raise UnknownContainerFormatError."""
input = StringIO("unknown format\n")
- reader = container.ContainerReader(input.read)
+ reader = pack.ContainerReader(input.read)
self.assertRaises(
errors.UnknownContainerFormatError, reader.iter_records)
@@ -103,7 +102,7 @@
UnexpectedEndOfContainerError to be raised.
"""
input = StringIO("bzr pack format 1\n")
- reader = container.ContainerReader(input.read)
+ reader = pack.ContainerReader(input.read)
iterator = reader.iter_records()
self.assertRaises(
errors.UnexpectedEndOfContainerError, iterator.next)
@@ -111,30 +110,108 @@
def test_unknown_record_type(self):
"""Unknown record types cause UnknownRecordTypeError to be raised."""
input = StringIO("bzr pack format 1\nX")
- reader = container.ContainerReader(input.read)
+ reader = pack.ContainerReader(input.read)
iterator = reader.iter_records()
self.assertRaises(
errors.UnknownRecordTypeError, iterator.next)
- # XXX: refactor Bytes record parsing into a seperate BytesRecordReader for
- # better unit testing.
- def test_one_unnamed_record(self):
- """Read a container with one Bytes record."""
+ def test_container_with_one_unnamed_record(self):
+ """Read a container with one Bytes record.
+
+ Parsing Bytes records is more thoroughly exercised by
+ TestBytesRecordReader. This test is here to ensure that
+ ContainerReader's integration with BytesRecordReader is working.
+ """
input = StringIO("bzr pack format 1\nB5\n\naaaaaE")
- reader = container.ContainerReader(input.read)
+ reader = pack.ContainerReader(input.read)
expected_records = [([], 'aaaaa')]
self.assertEqual(expected_records, list(reader.iter_records()))
- def test_one_named_record(self):
- """Read a container with one Bytes record with a single name."""
- input = StringIO("bzr pack format 1\nB5\nname1\n\naaaaaE")
- reader = container.ContainerReader(input.read)
- expected_records = [(['name1'], 'aaaaa')]
- self.assertEqual(expected_records, list(reader.iter_records()))
-
-
+
+class TestBytesRecordReader(tests.TestCase):
+ """Tests for parsing Bytes records with BytesRecordReader."""
+
+ def test_record_with_no_name(self):
+ """Reading a Bytes record with no name returns an empty list of
+ names.
+ """
+ input = StringIO("5\n\naaaaa")
+ reader = pack.BytesRecordReader(input.read)
+ names, bytes = reader.read()
+ self.assertEqual([], names)
+ self.assertEqual('aaaaa', bytes)
+
+ def test_record_with_one_name(self):
+ """Reading a Bytes record with one name returns a list of just that
+ name.
+ """
+ input = StringIO("5\nname1\n\naaaaa")
+ reader = pack.BytesRecordReader(input.read)
+ names, bytes = reader.read()
+ self.assertEqual(['name1'], names)
+ self.assertEqual('aaaaa', bytes)
+
+ def test_record_with_two_names(self):
+ """Reading a Bytes record with two names returns a list of both names.
+ """
+ input = StringIO("5\nname1\nname2\n\naaaaa")
+ reader = pack.BytesRecordReader(input.read)
+ names, bytes = reader.read()
+ self.assertEqual(['name1', 'name2'], names)
+ self.assertEqual('aaaaa', bytes)
+
+ def test_invalid_length(self):
+ """If the length-prefix is not a number, parsing raises
+ InvalidRecordError.
+ """
+ input = StringIO("not a number\n")
+ reader = pack.BytesRecordReader(input.read)
+ self.assertRaises(errors.InvalidRecordError, reader.read)
+
+ def test_early_eof(self):
+ """Tests for premature EOF occuring during parsing Bytes records with
+ BytesRecordReader.
+
+ A incomplete container might be interrupted at any point. The
+ BytesRecordReader needs to cope with the input stream running out no
+ matter where it is in the parsing process.
+
+ In all cases, UnexpectedEndOfContainerError should be raised.
+ """
+ complete_record = "6\nname\n\nabcdef"
+ for count in range(0, len(complete_record)):
+ input = StringIO(complete_record[:count])
+ reader = pack.BytesRecordReader(input.read)
+ # We don't use assertRaises to make diagnosing failures easier.
+ try:
+ reader.read()
+ except errors.UnexpectedEndOfContainerError:
+ pass
+ else:
+ self.fail(
+ "UnexpectedEndOfContainerError not raised when parsing %r"
+ % (input.getvalue()))
+
+ def test_initial(self):
+ """EOF before any bytes read at all."""
+ input = StringIO("")
+ reader = pack.BytesRecordReader(input.read)
+ self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+ def test_after_length(self):
+ """EOF after reading the length and before reading name(s)."""
+ input = StringIO("123\n")
+ reader = pack.BytesRecordReader(input.read)
+ self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+ def test_during_name(self):
+ """EOF during reading a name."""
+ input = StringIO("123\nname")
+ reader = pack.BytesRecordReader(input.read)
+ self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+
# Other Bytes record parsing cases to test:
- # - invalid length value
# - incomplete bytes (i.e. stream ends before $length bytes read)
# - _read_line encountering end of stream (at any time; during length,
# names, end of headers...)
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2007-06-07 16:09:34 +0000
+++ b/bzrlib/errors.py 2007-06-08 06:33:59 +0000
@@ -2163,3 +2163,10 @@
self.record_type = record_type
+class InvalidRecordError(ContainerError):
+
+ _fmt = "Invalid record: %(reason)s"
+
+ def __init__(self, reason):
+ self.reason = reason
+
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/__init__.py 2007-06-08 06:33:59 +0000
@@ -2278,7 +2278,7 @@
'bzrlib.tests.test_commit_merge',
'bzrlib.tests.test_config',
'bzrlib.tests.test_conflicts',
- 'bzrlib.tests.test_container',
+ 'bzrlib.tests.test_pack',
'bzrlib.tests.test_counted_lock',
'bzrlib.tests.test_decorators',
'bzrlib.tests.test_delta',
=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py 2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/test_errors.py 2007-06-08 06:33:59 +0000
@@ -286,6 +286,13 @@
"Unknown record type: 'X'",
str(e))
+ def test_invalid_record(self):
+ """Test the formatting of InvalidRecordError."""
+ e = errors.InvalidRecordError("xxx")
+ self.assertEqual(
+ "Invalid record: xxx",
+ str(e))
+
class PassThroughError(errors.BzrError):
More information about the bazaar-commits
mailing list