Rev 2508: More improvements, especially in error handling. in http://bazaar.launchpad.net/~bzr/bzr/container-format

Andrew Bennetts andrew.bennetts at canonical.com
Sat Jun 9 04:51:36 BST 2007


At http://bazaar.launchpad.net/~bzr/bzr/container-format

------------------------------------------------------------
revno: 2508
revision-id: andrew.bennetts at canonical.com-20070609034820-t7u540w5pyhvtgn3
parent: andrew.bennetts at canonical.com-20070607160934-jfs1wrxxtulso9nw
parent: andrew.bennetts at canonical.com-20070609034525-j9d7i5dlk6ou97eb
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Sat 2007-06-09 13:48:20 +1000
message:
  More improvements, especially in error handling.
renamed:
  bzrlib/container.py => bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
  bzrlib/tests/test_container.py => bzrlib/tests/test_pack.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
modified:
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_errors.py    test_errors.py-20060210110251-41aba2deddf936a8
  bzrlib/pack.py                 container.py-20070607160755-tr8zc26q18rn0jnb-1
  bzrlib/tests/test_pack.py      test_container.py-20070607160755-tr8zc26q18rn0jnb-2
    ------------------------------------------------------------
    revno: 2507.1.3
    revision-id: andrew.bennetts at canonical.com-20070609034525-j9d7i5dlk6ou97eb
    parent: andrew.bennetts at canonical.com-20070608064547-vzhyegqx2vl6pni3
    committer: Andrew Bennetts <andrew.bennetts at canonical.com>
    branch nick: container-format
    timestamp: Sat 2007-06-09 13:45:25 +1000
    message:
      Deal with EOF in the middle of a bytes record.
    ------------------------------------------------------------
    revno: 2507.1.2
    revision-id: andrew.bennetts at canonical.com-20070608064547-vzhyegqx2vl6pni3
    parent: andrew.bennetts at canonical.com-20070608063359-s5ps81a8i85w7by0
    committer: Andrew Bennetts <andrew.bennetts at canonical.com>
    branch nick: container-format
    timestamp: Fri 2007-06-08 16:45:47 +1000
    message:
      Test docstring tweaks, inspired by looking over the output of jml's testdoc tool.
    ------------------------------------------------------------
    revno: 2507.1.1
    revision-id: andrew.bennetts at canonical.com-20070608063359-s5ps81a8i85w7by0
    parent: andrew.bennetts at canonical.com-20070607160934-jfs1wrxxtulso9nw
    committer: Andrew Bennetts <andrew.bennetts at canonical.com>
    branch nick: container-format
    timestamp: Fri 2007-06-08 16:33:59 +1000
    message:
      More progress:
      
       * Rename container.py to pack.py
       * Refactor bytes record reading into a separate class for ease of unit testing.
       * Start handling error conditions such as invalid content lengths in byte
         records.
=== renamed file 'bzrlib/container.py' => 'bzrlib/pack.py'
--- a/bzrlib/container.py	2007-06-07 16:09:34 +0000
+++ b/bzrlib/pack.py	2007-06-09 03:45:25 +0000
@@ -19,31 +19,90 @@
 "Containers" and "records" are described in doc/developers/container-format.txt.
 """
 
-# XXX: probably rename this to pack.py
-
 from bzrlib import errors
 
 
 FORMAT_ONE = "bzr pack format 1"
 
 
-class ContainerReader(object):
-    """A class for reading Bazaar's container format."""
+class ContainerWriter(object):
+    """A class for writing containers."""
+
+    def __init__(self, write_func):
+        """Constructor.
+
+        :param write_func: a callable that will be called when this
+            ContainerWriter needs to write some bytes.
+        """
+        self.write_func = write_func
+
+    def begin(self):
+        """Begin writing a container."""
+        self.write_func(FORMAT_ONE + "\n")
+
+    def end(self):
+        """Finish writing a container."""
+        self.write_func("E")
+
+    def add_bytes_record(self, bytes, names):
+        """Add a Bytes record with the given names."""
+        # Kind marker
+        self.write_func("B")
+        # Length
+        self.write_func(str(len(bytes)) + "\n")
+        # Names
+        for name in names:
+            self.write_func(name + "\n")
+        # End of headers
+        self.write_func("\n")
+        # Finally, the contents.
+        self.write_func(bytes)
+
+
+class BaseReader(object):
 
     def __init__(self, reader_func):
         """Constructor.
 
         :param reader_func: a callable that takes one optional argument,
             ``size``, and returns at most that many bytes.  When the callable
-            returns an empty string, then at most that many bytes are read.
+            returns less than the requested number of bytes, then the end of the
+            file/stream has been reached.
         """
         self.reader_func = reader_func
 
+    def _read_line(self):
+        """Read a line from the input stream.
+
+        This is a simple but inefficient implementation that just reads one byte
+        at a time.  Lines should not be very long, so this is probably
+        tolerable.
+
+        :returns: a line, without the trailing newline
+        """
+        # XXX: Have a maximum line length, to prevent malicious input from
+        # consuming an unreasonable amount of resources?
+        #   -- Andrew Bennetts, 2007-05-07.
+        line = ''
+        while not line.endswith('\n'):
+            byte = self.reader_func(1)
+            if byte == '':
+                raise errors.UnexpectedEndOfContainerError()
+            line += byte
+        return line[:-1]
+
+
+class ContainerReader(BaseReader):
+    """A class for reading Bazaar's container format."""
+
     def iter_records(self):
         """Iterate over the container, yielding each record as it is read.
 
         Each yielded record will be a 2-tuple of (names, bytes), where names is
         a ``list`` and bytes is a ``str`.
+
+        :raises UnknownContainerFormatError: if the format of the container is
+            unrecognised.
         """
         format = self._read_line()
         if format != FORMAT_ONE:
@@ -55,7 +114,8 @@
             record_kind = self.reader_func(1)
             if record_kind == 'B':
                 # Bytes record.
-                yield self._read_bytes_record()
+                reader = BytesRecordReader(self.reader_func)
+                yield reader.read()
             elif record_kind == 'E':
                 # End marker.  There are no more records.
                 return
@@ -67,8 +127,53 @@
                 # Unknown record type.
                 raise errors.UnknownRecordTypeError(record_kind)
 
-    def _read_bytes_record(self):
-        length = int(self._read_line())
+
+class ContainerWriter(object):
+    """A class for writing containers."""
+
+    def __init__(self, write_func):
+        """Constructor.
+
+        :param write_func: a callable that will be called when this
+            ContainerWriter needs to write some bytes.
+        """
+        self.write_func = write_func
+
+    def begin(self):
+        """Begin writing a container."""
+        self.write_func(FORMAT_ONE + "\n")
+
+    def end(self):
+        """Finish writing a container."""
+        self.write_func("E")
+
+    def add_bytes_record(self, bytes, names):
+        """Add a Bytes record with the given names."""
+        # Kind marker
+        self.write_func("B")
+        # Length
+        self.write_func(str(len(bytes)) + "\n")
+        # Names
+        for name in names:
+            self.write_func(name + "\n")
+        # End of headers
+        self.write_func("\n")
+        # Finally, the contents.
+        self.write_func(bytes)
+
+
+class BytesRecordReader(BaseReader):
+
+    def read(self):
+        # Read the content length.
+        length_line = self._read_line()
+        try:
+            length = int(length_line)
+        except ValueError:
+            raise errors.InvalidRecordError(
+                "%r is not a valid length." % (length_line,))
+        
+        # Read the list of names.
         names = []
         while True:
             name = self._read_line()
@@ -76,57 +181,7 @@
                 break
             names.append(name)
         bytes = self.reader_func(length)
-        # XXX: deal with case where len(bytes) != length
+        if len(bytes) != length:
+            raise errors.UnexpectedEndOfContainerError()
         return names, bytes
 
-    def _read_line(self):
-        """Read a line from the input stream.
-
-        This is a simple but inefficient implementation that just reads one byte
-        at a time.  Lines should not be very long, so this is probably
-        tolerable.
-
-        :returns: a line, without the trailing newline
-        """
-        # XXX: Have a maximum line length, to prevent malicious input from
-        # consuming an unreasonable amount of resources?
-        #   -- Andrew Bennetts, 2007-05-07.
-        line = ''
-        while not line.endswith('\n'):
-            line += self.reader_func(1)
-        return line[:-1]
-
-
-class ContainerWriter(object):
-    """A class for writing containers."""
-
-    def __init__(self, write_func):
-        """Constructor.
-
-        :param write_func: a callable that will be called when this
-            ContainerWriter needs to write some bytes.
-        """
-        self.write_func = write_func
-
-    def begin(self):
-        """Begin writing a container."""
-        self.write_func(FORMAT_ONE + "\n")
-
-    def end(self):
-        """Finish writing a container."""
-        self.write_func("E")
-
-    def add_bytes_record(self, bytes, names):
-        """Add a Bytes record with the given names."""
-        # Kind marker
-        self.write_func("B")
-        # Length
-        self.write_func(str(len(bytes)) + "\n")
-        # Names
-        for name in names:
-            self.write_func(name + "\n")
-        # End of headers
-        self.write_func("\n")
-        # Finally, the contents.
-        self.write_func(bytes)
-

=== renamed file 'bzrlib/tests/test_container.py' => 'bzrlib/tests/test_pack.py'
--- a/bzrlib/tests/test_container.py	2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/test_pack.py	2007-06-09 03:45:25 +0000
@@ -14,16 +14,15 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-"""Tests for bzrlib.container."""
+"""Tests for bzrlib.pack."""
 
 
 from cStringIO import StringIO
 
-from bzrlib import container, errors
-from bzrlib.tests import TestCase
-
-
-class TestContainerWriter(TestCase):
+from bzrlib import pack, errors, tests
+
+
+class TestContainerWriter(tests.TestCase):
 
     def test_construct(self):
         """Test constructing a ContainerWriter.
@@ -31,19 +30,19 @@
         This uses None as the output stream to show that the constructor doesn't
         try to use the output stream.
         """
-        writer = container.ContainerWriter(None)
+        writer = pack.ContainerWriter(None)
 
     def test_begin(self):
-        """Test the begin() method."""
+        """The begin() method writes the container format marker line."""
         output = StringIO()
-        writer = container.ContainerWriter(output.write)
+        writer = pack.ContainerWriter(output.write)
         writer.begin()
         self.assertEqual('bzr pack format 1\n', output.getvalue())
 
     def test_end(self):
-        """Test the end() method."""
+        """The end() method writes an End Marker record."""
         output = StringIO()
-        writer = container.ContainerWriter(output.write)
+        writer = pack.ContainerWriter(output.write)
         writer.begin()
         writer.end()
         self.assertEqual('bzr pack format 1\nE', output.getvalue())
@@ -51,7 +50,7 @@
     def test_add_bytes_record_no_name(self):
         """Add a bytes record with no name."""
         output = StringIO()
-        writer = container.ContainerWriter(output.write)
+        writer = pack.ContainerWriter(output.write)
         writer.begin()
         writer.add_bytes_record('abc', names=[])
         self.assertEqual('bzr pack format 1\nB3\n\nabc', output.getvalue())
@@ -59,7 +58,7 @@
     def test_add_bytes_record_one_name(self):
         """Add a bytes record with one name."""
         output = StringIO()
-        writer = container.ContainerWriter(output.write)
+        writer = pack.ContainerWriter(output.write)
         writer.begin()
         writer.add_bytes_record('abc', names=['name1'])
         self.assertEqual('bzr pack format 1\nB3\nname1\n\nabc',
@@ -68,14 +67,14 @@
     def test_add_bytes_record_two_names(self):
         """Add a bytes record with two names."""
         output = StringIO()
-        writer = container.ContainerWriter(output.write)
+        writer = pack.ContainerWriter(output.write)
         writer.begin()
         writer.add_bytes_record('abc', names=['name1', 'name2'])
         self.assertEqual('bzr pack format 1\nB3\nname1\nname2\n\nabc',
                          output.getvalue())
 
 
-class TestContainerReader(TestCase):
+class TestContainerReader(tests.TestCase):
 
     def test_construct(self):
         """Test constructing a ContainerReader.
@@ -83,18 +82,18 @@
         This uses None as the output stream to show that the constructor doesn't
         try to use the input stream.
         """
-        reader = container.ContainerReader(None)
+        reader = pack.ContainerReader(None)
 
     def test_empty_container(self):
         """Read an empty container."""
         input = StringIO("bzr pack format 1\nE")
-        reader = container.ContainerReader(input.read)
+        reader = pack.ContainerReader(input.read)
         self.assertEqual([], list(reader.iter_records()))
 
     def test_unknown_format(self):
         """Unrecognised container formats raise UnknownContainerFormatError."""
         input = StringIO("unknown format\n")
-        reader = container.ContainerReader(input.read)
+        reader = pack.ContainerReader(input.read)
         self.assertRaises(
             errors.UnknownContainerFormatError, reader.iter_records)
 
@@ -103,7 +102,7 @@
         UnexpectedEndOfContainerError to be raised.
         """
         input = StringIO("bzr pack format 1\n")
-        reader = container.ContainerReader(input.read)
+        reader = pack.ContainerReader(input.read)
         iterator = reader.iter_records()
         self.assertRaises(
             errors.UnexpectedEndOfContainerError, iterator.next)
@@ -111,30 +110,108 @@
     def test_unknown_record_type(self):
         """Unknown record types cause UnknownRecordTypeError to be raised."""
         input = StringIO("bzr pack format 1\nX")
-        reader = container.ContainerReader(input.read)
+        reader = pack.ContainerReader(input.read)
         iterator = reader.iter_records()
         self.assertRaises(
             errors.UnknownRecordTypeError, iterator.next)
 
-    # XXX: refactor Bytes record parsing into a seperate BytesRecordReader for
-    #      better unit testing.
-    def test_one_unnamed_record(self):
-        """Read a container with one Bytes record."""
+    def test_container_with_one_unnamed_record(self):
+        """Read a container with one Bytes record.
+        
+        Parsing Bytes records is more thoroughly exercised by
+        TestBytesRecordReader.  This test is here to ensure that
+        ContainerReader's integration with BytesRecordReader is working.
+        """
         input = StringIO("bzr pack format 1\nB5\n\naaaaaE")
-        reader = container.ContainerReader(input.read)
+        reader = pack.ContainerReader(input.read)
         expected_records = [([], 'aaaaa')]
         self.assertEqual(expected_records, list(reader.iter_records()))
 
-    def test_one_named_record(self):
-        """Read a container with one Bytes record with a single name."""
-        input = StringIO("bzr pack format 1\nB5\nname1\n\naaaaaE")
-        reader = container.ContainerReader(input.read)
-        expected_records = [(['name1'], 'aaaaa')]
-        self.assertEqual(expected_records, list(reader.iter_records()))
-
-
+
+class TestBytesRecordReader(tests.TestCase):
+    """Tests for parsing Bytes records with BytesRecordReader."""
+
+    def test_record_with_no_name(self):
+        """Reading a Bytes record with no name returns an empty list of
+        names.
+        """
+        input = StringIO("5\n\naaaaa")
+        reader = pack.BytesRecordReader(input.read)
+        names, bytes = reader.read()
+        self.assertEqual([], names)
+        self.assertEqual('aaaaa', bytes)
+
+    def test_record_with_one_name(self):
+        """Reading a Bytes record with one name returns a list of just that
+        name.
+        """
+        input = StringIO("5\nname1\n\naaaaa")
+        reader = pack.BytesRecordReader(input.read)
+        names, bytes = reader.read()
+        self.assertEqual(['name1'], names)
+        self.assertEqual('aaaaa', bytes)
+
+    def test_record_with_two_names(self):
+        """Reading a Bytes record with two names returns a list of both names.
+        """
+        input = StringIO("5\nname1\nname2\n\naaaaa")
+        reader = pack.BytesRecordReader(input.read)
+        names, bytes = reader.read()
+        self.assertEqual(['name1', 'name2'], names)
+        self.assertEqual('aaaaa', bytes)
+
+    def test_invalid_length(self):
+        """If the length-prefix is not a number, parsing raises
+        InvalidRecordError.
+        """
+        input = StringIO("not a number\n")
+        reader = pack.BytesRecordReader(input.read)
+        self.assertRaises(errors.InvalidRecordError, reader.read)
+
+    def test_early_eof(self):
+        """Tests for premature EOF occuring during parsing Bytes records with
+        BytesRecordReader.
+        
+        A incomplete container might be interrupted at any point.  The
+        BytesRecordReader needs to cope with the input stream running out no
+        matter where it is in the parsing process.
+
+        In all cases, UnexpectedEndOfContainerError should be raised.
+        """
+        complete_record = "6\nname\n\nabcdef"
+        for count in range(0, len(complete_record)):
+            input = StringIO(complete_record[:count])
+            reader = pack.BytesRecordReader(input.read)
+            # We don't use assertRaises to make diagnosing failures easier.
+            try:
+                reader.read()
+            except errors.UnexpectedEndOfContainerError:
+                pass
+            else:
+                self.fail(
+                    "UnexpectedEndOfContainerError not raised when parsing %r"
+                    % (input.getvalue()))
+
+    def test_initial(self):
+        """EOF before any bytes read at all."""
+        input = StringIO("")
+        reader = pack.BytesRecordReader(input.read)
+        self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+    def test_after_length(self):
+        """EOF after reading the length and before reading name(s)."""
+        input = StringIO("123\n")
+        reader = pack.BytesRecordReader(input.read)
+        self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+    def test_during_name(self):
+        """EOF during reading a name."""
+        input = StringIO("123\nname")
+        reader = pack.BytesRecordReader(input.read)
+        self.assertRaises(errors.UnexpectedEndOfContainerError, reader.read)
+
+        
     # Other Bytes record parsing cases to test:
-    #  - invalid length value
     #  - incomplete bytes (i.e. stream ends before $length bytes read)
     #  - _read_line encountering end of stream (at any time; during length,
     #    names, end of headers...)

=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2007-06-07 16:09:34 +0000
+++ b/bzrlib/errors.py	2007-06-08 06:33:59 +0000
@@ -2163,3 +2163,10 @@
         self.record_type = record_type
 
 
+class InvalidRecordError(ContainerError):
+
+    _fmt = "Invalid record: %(reason)s"
+
+    def __init__(self, reason):
+        self.reason = reason
+

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/__init__.py	2007-06-08 06:33:59 +0000
@@ -2278,7 +2278,7 @@
                    'bzrlib.tests.test_commit_merge',
                    'bzrlib.tests.test_config',
                    'bzrlib.tests.test_conflicts',
-                   'bzrlib.tests.test_container',
+                   'bzrlib.tests.test_pack',
                    'bzrlib.tests.test_counted_lock',
                    'bzrlib.tests.test_decorators',
                    'bzrlib.tests.test_delta',

=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py	2007-06-07 16:09:34 +0000
+++ b/bzrlib/tests/test_errors.py	2007-06-08 06:33:59 +0000
@@ -286,6 +286,13 @@
             "Unknown record type: 'X'",
             str(e))
 
+    def test_invalid_record(self):
+        """Test the formatting of InvalidRecordError."""
+        e = errors.InvalidRecordError("xxx")
+        self.assertEqual(
+            "Invalid record: xxx",
+            str(e))
+
 
 class PassThroughError(errors.BzrError):
     




More information about the bazaar-commits mailing list