Rev 2507: Start implementing container format reading and writing. in http://bazaar.launchpad.net/~bzr/bzr/container-format

Andrew Bennetts andrew.bennetts at canonical.com
Thu Jun 7 17:12:24 BST 2007


At http://bazaar.launchpad.net/~bzr/bzr/container-format

------------------------------------------------------------
revno: 2507
revision-id: andrew.bennetts at canonical.com-20070607160934-jfs1wrxxtulso9nw
parent: pqm at pqm.ubuntu.com-20070604194535-ihhpf84qp0icoj2t
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: container-format
timestamp: Fri 2007-06-08 02:09:34 +1000
message:
  Start implementing container format reading and writing.
added:
  bzrlib/container.py            container.py-20070607160755-tr8zc26q18rn0jnb-1
  bzrlib/tests/test_container.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
modified:
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_errors.py    test_errors.py-20060210110251-41aba2deddf936a8
=== added file 'bzrlib/container.py'
--- a/bzrlib/container.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/container.py	2007-06-07 16:09:34 +0000
@@ -0,0 +1,132 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Container format for Bazaar data.
+
+"Containers" and "records" are described in doc/developers/container-format.txt.
+"""
+
+# XXX: probably rename this to pack.py
+
+from bzrlib import errors
+
+
+FORMAT_ONE = "bzr pack format 1"
+
+
+class ContainerReader(object):
+    """A class for reading Bazaar's container format."""
+
+    def __init__(self, reader_func):
+        """Constructor.
+
+        :param reader_func: a callable that takes one optional argument,
+            ``size``, and returns at most that many bytes.  When the callable
+            returns an empty string, then at most that many bytes are read.
+        """
+        self.reader_func = reader_func
+
+    def iter_records(self):
+        """Iterate over the container, yielding each record as it is read.
+
+        Each yielded record will be a 2-tuple of (names, bytes), where names is
+        a ``list`` and bytes is a ``str`.
+        """
+        format = self._read_line()
+        if format != FORMAT_ONE:
+            raise errors.UnknownContainerFormatError(format)
+        return self._iter_records()
+    
+    def _iter_records(self):
+        while True:
+            record_kind = self.reader_func(1)
+            if record_kind == 'B':
+                # Bytes record.
+                yield self._read_bytes_record()
+            elif record_kind == 'E':
+                # End marker.  There are no more records.
+                return
+            elif record_kind == '':
+                # End of stream encountered, but no End Marker record seen, so
+                # this container is incomplete.
+                raise errors.UnexpectedEndOfContainerError()
+            else:
+                # Unknown record type.
+                raise errors.UnknownRecordTypeError(record_kind)
+
+    def _read_bytes_record(self):
+        length = int(self._read_line())
+        names = []
+        while True:
+            name = self._read_line()
+            if name == '':
+                break
+            names.append(name)
+        bytes = self.reader_func(length)
+        # XXX: deal with case where len(bytes) != length
+        return names, bytes
+
+    def _read_line(self):
+        """Read a line from the input stream.
+
+        This is a simple but inefficient implementation that just reads one byte
+        at a time.  Lines should not be very long, so this is probably
+        tolerable.
+
+        :returns: a line, without the trailing newline
+        """
+        # XXX: Have a maximum line length, to prevent malicious input from
+        # consuming an unreasonable amount of resources?
+        #   -- Andrew Bennetts, 2007-05-07.
+        line = ''
+        while not line.endswith('\n'):
+            line += self.reader_func(1)
+        return line[:-1]
+
+
+class ContainerWriter(object):
+    """A class for writing containers."""
+
+    def __init__(self, write_func):
+        """Constructor.
+
+        :param write_func: a callable that will be called when this
+            ContainerWriter needs to write some bytes.
+        """
+        self.write_func = write_func
+
+    def begin(self):
+        """Begin writing a container."""
+        self.write_func(FORMAT_ONE + "\n")
+
+    def end(self):
+        """Finish writing a container."""
+        self.write_func("E")
+
+    def add_bytes_record(self, bytes, names):
+        """Add a Bytes record with the given names."""
+        # Kind marker
+        self.write_func("B")
+        # Length
+        self.write_func(str(len(bytes)) + "\n")
+        # Names
+        for name in names:
+            self.write_func(name + "\n")
+        # End of headers
+        self.write_func("\n")
+        # Finally, the contents.
+        self.write_func(bytes)
+

=== added file 'bzrlib/tests/test_container.py'
--- a/bzrlib/tests/test_container.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_container.py	2007-06-07 16:09:34 +0000
@@ -0,0 +1,143 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for bzrlib.container."""
+
+
+from cStringIO import StringIO
+
+from bzrlib import container, errors
+from bzrlib.tests import TestCase
+
+
+class TestContainerWriter(TestCase):
+
+    def test_construct(self):
+        """Test constructing a ContainerWriter.
+        
+        This uses None as the output stream to show that the constructor doesn't
+        try to use the output stream.
+        """
+        writer = container.ContainerWriter(None)
+
+    def test_begin(self):
+        """Test the begin() method."""
+        output = StringIO()
+        writer = container.ContainerWriter(output.write)
+        writer.begin()
+        self.assertEqual('bzr pack format 1\n', output.getvalue())
+
+    def test_end(self):
+        """Test the end() method."""
+        output = StringIO()
+        writer = container.ContainerWriter(output.write)
+        writer.begin()
+        writer.end()
+        self.assertEqual('bzr pack format 1\nE', output.getvalue())
+
+    def test_add_bytes_record_no_name(self):
+        """Add a bytes record with no name."""
+        output = StringIO()
+        writer = container.ContainerWriter(output.write)
+        writer.begin()
+        writer.add_bytes_record('abc', names=[])
+        self.assertEqual('bzr pack format 1\nB3\n\nabc', output.getvalue())
+
+    def test_add_bytes_record_one_name(self):
+        """Add a bytes record with one name."""
+        output = StringIO()
+        writer = container.ContainerWriter(output.write)
+        writer.begin()
+        writer.add_bytes_record('abc', names=['name1'])
+        self.assertEqual('bzr pack format 1\nB3\nname1\n\nabc',
+                         output.getvalue())
+
+    def test_add_bytes_record_two_names(self):
+        """Add a bytes record with two names."""
+        output = StringIO()
+        writer = container.ContainerWriter(output.write)
+        writer.begin()
+        writer.add_bytes_record('abc', names=['name1', 'name2'])
+        self.assertEqual('bzr pack format 1\nB3\nname1\nname2\n\nabc',
+                         output.getvalue())
+
+
+class TestContainerReader(TestCase):
+
+    def test_construct(self):
+        """Test constructing a ContainerReader.
+        
+        This uses None as the output stream to show that the constructor doesn't
+        try to use the input stream.
+        """
+        reader = container.ContainerReader(None)
+
+    def test_empty_container(self):
+        """Read an empty container."""
+        input = StringIO("bzr pack format 1\nE")
+        reader = container.ContainerReader(input.read)
+        self.assertEqual([], list(reader.iter_records()))
+
+    def test_unknown_format(self):
+        """Unrecognised container formats raise UnknownContainerFormatError."""
+        input = StringIO("unknown format\n")
+        reader = container.ContainerReader(input.read)
+        self.assertRaises(
+            errors.UnknownContainerFormatError, reader.iter_records)
+
+    def test_unexpected_end_of_container(self):
+        """Containers that don't end with an End Marker record should cause
+        UnexpectedEndOfContainerError to be raised.
+        """
+        input = StringIO("bzr pack format 1\n")
+        reader = container.ContainerReader(input.read)
+        iterator = reader.iter_records()
+        self.assertRaises(
+            errors.UnexpectedEndOfContainerError, iterator.next)
+
+    def test_unknown_record_type(self):
+        """Unknown record types cause UnknownRecordTypeError to be raised."""
+        input = StringIO("bzr pack format 1\nX")
+        reader = container.ContainerReader(input.read)
+        iterator = reader.iter_records()
+        self.assertRaises(
+            errors.UnknownRecordTypeError, iterator.next)
+
+    # XXX: refactor Bytes record parsing into a seperate BytesRecordReader for
+    #      better unit testing.
+    def test_one_unnamed_record(self):
+        """Read a container with one Bytes record."""
+        input = StringIO("bzr pack format 1\nB5\n\naaaaaE")
+        reader = container.ContainerReader(input.read)
+        expected_records = [([], 'aaaaa')]
+        self.assertEqual(expected_records, list(reader.iter_records()))
+
+    def test_one_named_record(self):
+        """Read a container with one Bytes record with a single name."""
+        input = StringIO("bzr pack format 1\nB5\nname1\n\naaaaaE")
+        reader = container.ContainerReader(input.read)
+        expected_records = [(['name1'], 'aaaaa')]
+        self.assertEqual(expected_records, list(reader.iter_records()))
+
+
+    # Other Bytes record parsing cases to test:
+    #  - invalid length value
+    #  - incomplete bytes (i.e. stream ends before $length bytes read)
+    #  - _read_line encountering end of stream (at any time; during length,
+    #    names, end of headers...)
+
+
+

=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2007-05-21 14:14:36 +0000
+++ b/bzrlib/errors.py	2007-06-07 16:09:34 +0000
@@ -2134,3 +2134,32 @@
 
     def __init__(self, response_tuple):
         self.response_tuple = response_tuple
+
+
+class ContainerError(BzrError):
+    """Base class of container errors."""
+
+
+class UnknownContainerFormatError(ContainerError):
+
+    _fmt = "Unrecognised container format: %(container_format)r"
+    
+    def __init__(self, container_format):
+        self.container_format = container_format
+
+
+class UnexpectedEndOfContainerError(ContainerError):
+
+    _fmt = "Unexpected end of container stream"
+
+    internal_error = False
+
+
+class UnknownRecordTypeError(ContainerError):
+
+    _fmt = "Unknown record type: %(record_type)r"
+
+    def __init__(self, record_type):
+        self.record_type = record_type
+
+

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-05-30 12:45:23 +0000
+++ b/bzrlib/tests/__init__.py	2007-06-07 16:09:34 +0000
@@ -2278,6 +2278,7 @@
                    'bzrlib.tests.test_commit_merge',
                    'bzrlib.tests.test_config',
                    'bzrlib.tests.test_conflicts',
+                   'bzrlib.tests.test_container',
                    'bzrlib.tests.test_counted_lock',
                    'bzrlib.tests.test_decorators',
                    'bzrlib.tests.test_delta',

=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py	2007-04-26 09:07:38 +0000
+++ b/bzrlib/tests/test_errors.py	2007-06-07 16:09:34 +0000
@@ -266,6 +266,26 @@
             "Could not understand response from smart server: ('not yes',)",
             str(e))
 
+    def test_unknown_container_format(self):
+        """Test the formatting of UnknownContainerFormatError."""
+        e = errors.UnknownContainerFormatError('bad format string')
+        self.assertEqual(
+            "Unrecognised container format: 'bad format string'",
+            str(e))
+
+    def test_unexpected_end_of_container(self):
+        """Test the formatting of UnexpectedEndOfContainerError."""
+        e = errors.UnexpectedEndOfContainerError()
+        self.assertEqual(
+            "Unexpected end of container stream", str(e))
+
+    def test_unknown_record_type(self):
+        """Test the formatting of UnknownRecordTypeError."""
+        e = errors.UnknownRecordTypeError("X")
+        self.assertEqual(
+            "Unknown record type: 'X'",
+            str(e))
+
 
 class PassThroughError(errors.BzrError):
     




More information about the bazaar-commits mailing list