Rev 2667: (robertc) Add support to the bzrlib.pack interface for arbitrary-record access via a readv adapter for transports. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Aug 2 07:33:24 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2667
revision-id: pqm at pqm.ubuntu.com-20070802063321-lpx3oazcxyac24oa
parent: pqm at pqm.ubuntu.com-20070801171451-en3tds1hzlru2j83
parent: robertc at robertcollins.net-20070802053516-32sp0lvric0x5ugl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2007-08-02 07:33:21 +0100
message:
  (robertc) Add support to the bzrlib.pack interface for arbitrary-record access via a readv adapter for transports. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/pack.py                 container.py-20070607160755-tr8zc26q18rn0jnb-1
  bzrlib/tests/test_pack.py      test_container.py-20070607160755-tr8zc26q18rn0jnb-2
  bzrlib/transport/local.py      local_transport.py-20050711165921-9b1f142bfe480c24
    ------------------------------------------------------------
    revno: 2661.2.3
    merged: robertc at robertcollins.net-20070802053516-32sp0lvric0x5ugl
    parent: robertc at robertcollins.net-20070802031746-mpnoaxym829719w6
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: pack
    timestamp: Thu 2007-08-02 15:35:16 +1000
    message:
      Review feedback.
    ------------------------------------------------------------
    revno: 2661.2.2
    merged: robertc at robertcollins.net-20070802031746-mpnoaxym829719w6
    parent: robertc at robertcollins.net-20070802021817-n8a86kevyvk2f9jo
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: pack
    timestamp: Thu 2007-08-02 13:17:46 +1000
    message:
      * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
        files that are stored on a transport. (Robert Collins)
    ------------------------------------------------------------
    revno: 2661.2.1
    merged: robertc at robertcollins.net-20070802021817-n8a86kevyvk2f9jo
    parent: pqm at pqm.ubuntu.com-20070730235409-pfqxlkh2dcs95u70
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: pack
    timestamp: Thu 2007-08-02 12:18:17 +1000
    message:
      * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
        callers when inserting data, allowing generation of readv style access
        during pack creation, without needing a separate pass across the output
        pack to gather such details. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS	2007-08-01 17:14:51 +0000
+++ b/NEWS	2007-08-02 06:33:21 +0000
@@ -184,6 +184,14 @@
     * Graph now has an is_ancestor method, various bits use it.
       (Aaron Bentley)
 
+    * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
+      callers when inserting data, allowing generation of readv style access
+      during pack creation, without needing a separate pass across the output
+      pack to gather such details. (Robert Collins)
+
+    * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
+      files that are stored on a transport. (Robert Collins)
+
   TESTING:
 
     * Remove selftest ``--clean-output``, ``--numbered-dirs`` and

=== modified file 'bzrlib/pack.py'
--- a/bzrlib/pack.py	2007-07-03 04:12:19 +0000
+++ b/bzrlib/pack.py	2007-08-02 03:17:46 +0000
@@ -19,6 +19,7 @@
 "Containers" and "records" are described in doc/developers/container-format.txt.
 """
 
+from cStringIO import StringIO
 import re
 
 from bzrlib import errors
@@ -66,18 +67,34 @@
         :param write_func: a callable that will be called when this
             ContainerWriter needs to write some bytes.
         """
-        self.write_func = write_func
+        self._write_func = write_func
+        self.current_offset = 0
 
     def begin(self):
         """Begin writing a container."""
         self.write_func(FORMAT_ONE + "\n")
 
+    def write_func(self, bytes):
+        self._write_func(bytes)
+        self.current_offset += len(bytes)
+
     def end(self):
         """Finish writing a container."""
         self.write_func("E")
 
     def add_bytes_record(self, bytes, names):
-        """Add a Bytes record with the given names."""
+        """Add a Bytes record with the given names.
+        
+        :param bytes: The bytes to insert.
+        :param names: The names to give the inserted bytes.
+        :return: An offset, length tuple. The offset is the offset
+            of the record within the container, and the length is the
+            length of data that will need to be read to reconstitute the
+            record. These offset and length can only be used with the pack
+            interface - they might be offset by headers or other such details
+            and thus are only suitable for use by a ContainerReader.
+        """
+        current_offset = self.current_offset
         # Kind marker
         self.write_func("B")
         # Length
@@ -92,6 +109,54 @@
         self.write_func("\n")
         # Finally, the contents.
         self.write_func(bytes)
+        # return a memo of where we wrote data to allow random access.
+        return current_offset, self.current_offset - current_offset
+
+
+class ReadVFile(object):
+    """Adapt a readv result iterator to a file like protocol."""
+
+    def __init__(self, readv_result):
+        self.readv_result = readv_result
+        # the most recent readv result block
+        self._string = None
+
+    def _next(self):
+        if (self._string is None or
+            self._string.tell() == self._string_length):
+            length, data = self.readv_result.next()
+            self._string_length = len(data)
+            self._string = StringIO(data)
+
+    def read(self, length):
+        self._next()
+        result = self._string.read(length)
+        if len(result) < length:
+            raise errors.BzrError('request for too much data from a readv hunk.')
+        return result
+
+    def readline(self):
+        """Note that readline will not cross readv segments."""
+        self._next()
+        result = self._string.readline()
+        if self._string.tell() == self._string_length and result[-1] != '\n':
+            raise errors.BzrError('short readline in the readvfile hunk.')
+        return result
+
+
+def make_readv_reader(transport, filename, requested_records):
+    """Create a ContainerReader that will read selected records only.
+
+    :param transport: The transport the pack file is located on.
+    :param filename: The filename of the pack file.
+    :param requested_records: The record offset, length tuples as returned
+        by add_bytes_record for the desired records.
+    """
+    readv_blocks = [(0, len(FORMAT_ONE)+1)]
+    readv_blocks.extend(requested_records)
+    result = ContainerReader(ReadVFile(
+        transport.readv(filename, readv_blocks)))
+    return result
 
 
 class BaseReader(object):

=== modified file 'bzrlib/tests/test_pack.py'
--- a/bzrlib/tests/test_pack.py	2007-07-03 04:05:08 +0000
+++ b/bzrlib/tests/test_pack.py	2007-08-02 05:35:16 +0000
@@ -54,7 +54,8 @@
         output = StringIO()
         writer = pack.ContainerWriter(output.write)
         writer.begin()
-        writer.add_bytes_record('abc', names=[])
+        offset, length = writer.add_bytes_record('abc', names=[])
+        self.assertEqual((42, 7), (offset, length))
         self.assertEqual('Bazaar pack format 1 (introduced in 0.18)\nB3\n\nabc',
                          output.getvalue())
 
@@ -63,7 +64,8 @@
         output = StringIO()
         writer = pack.ContainerWriter(output.write)
         writer.begin()
-        writer.add_bytes_record('abc', names=['name1'])
+        offset, length = writer.add_bytes_record('abc', names=['name1'])
+        self.assertEqual((42, 13), (offset, length))
         self.assertEqual(
             'Bazaar pack format 1 (introduced in 0.18)\n'
             'B3\nname1\n\nabc',
@@ -74,12 +76,26 @@
         output = StringIO()
         writer = pack.ContainerWriter(output.write)
         writer.begin()
-        writer.add_bytes_record('abc', names=['name1', 'name2'])
+        offset, length = writer.add_bytes_record('abc', names=['name1', 'name2'])
+        self.assertEqual((42, 19), (offset, length))
         self.assertEqual(
             'Bazaar pack format 1 (introduced in 0.18)\n'
             'B3\nname1\nname2\n\nabc',
             output.getvalue())
 
+    def test_add_second_bytes_record_gets_higher_offset(self):
+        output = StringIO()
+        writer = pack.ContainerWriter(output.write)
+        writer.begin()
+        writer.add_bytes_record('abc', names=[])
+        offset, length = writer.add_bytes_record('abc', names=[])
+        self.assertEqual((49, 7), (offset, length))
+        self.assertEqual(
+            'Bazaar pack format 1 (introduced in 0.18)\n'
+            'B3\n\nabc'
+            'B3\n\nabc',
+            output.getvalue())
+
     def test_add_bytes_record_invalid_name(self):
         """Adding a Bytes record with a name with whitespace in it raises
         InvalidRecordError.
@@ -375,3 +391,70 @@
         self.assertEqual('', get_bytes(99))
 
 
+class TestMakeReadvReader(tests.TestCaseWithTransport):
+
+    def test_read_skipping_records(self):
+        pack_data = StringIO()
+        writer = pack.ContainerWriter(pack_data.write)
+        writer.begin()
+        memos = []
+        memos.append(writer.add_bytes_record('abc', names=[]))
+        memos.append(writer.add_bytes_record('def', names=['name1']))
+        memos.append(writer.add_bytes_record('ghi', names=['name2']))
+        memos.append(writer.add_bytes_record('jkl', names=[]))
+        writer.end()
+        transport = self.get_transport()
+        transport.put_bytes('mypack', pack_data.getvalue())
+        requested_records = [memos[0], memos[2]]
+        reader = pack.make_readv_reader(transport, 'mypack', requested_records)
+        result = []
+        for names, reader_func in reader.iter_records():
+            result.append((names, reader_func(None)))
+        self.assertEqual([([], 'abc'), (['name2'], 'ghi')], result)
+
+
+class TestReadvFile(tests.TestCaseWithTransport):
+    """Tests of the ReadVFile class.
+
+    Error cases are deliberately undefined: this code adapts the underlying
+    transport interface to a single 'streaming read' interface as 
+    ContainerReader needs.
+    """
+
+    def test_read_bytes(self):
+        """Test reading of both single bytes and all bytes in a hunk."""
+        transport = self.get_transport()
+        transport.put_bytes('sample', '0123456789')
+        f = pack.ReadVFile(transport.readv('sample', [(0,1), (1,2), (4,1), (6,2)]))
+        results = []
+        results.append(f.read(1))
+        results.append(f.read(2))
+        results.append(f.read(1))
+        results.append(f.read(1))
+        results.append(f.read(1))
+        self.assertEqual(['0', '12', '4', '6', '7'], results)
+
+    def test_readline(self):
+        """Test using readline() as ContainerReader does.
+
+        This is always within a readv hunk, never across it.
+        """
+        transport = self.get_transport()
+        transport.put_bytes('sample', '0\n2\n4\n')
+        f = pack.ReadVFile(transport.readv('sample', [(0,2), (2,4)]))
+        results = []
+        results.append(f.readline())
+        results.append(f.readline())
+        results.append(f.readline())
+        self.assertEqual(['0\n', '2\n', '4\n'], results)
+
+    def test_readline_and_read(self):
+        """Test exercising one byte reads, readline, and then read again."""
+        transport = self.get_transport()
+        transport.put_bytes('sample', '0\n2\n4\n')
+        f = pack.ReadVFile(transport.readv('sample', [(0,6)]))
+        results = []
+        results.append(f.read(1))
+        results.append(f.readline())
+        results.append(f.read(4))
+        self.assertEqual(['0', '\n', '2\n4\n'], results)

=== modified file 'bzrlib/transport/local.py'
--- a/bzrlib/transport/local.py	2007-07-20 03:20:20 +0000
+++ b/bzrlib/transport/local.py	2007-08-02 03:17:46 +0000
@@ -127,7 +127,7 @@
             abspath = u'.'
 
         return urlutils.file_relpath(
-            urlutils.strip_trailing_slash(self.base), 
+            urlutils.strip_trailing_slash(self.base),
             urlutils.strip_trailing_slash(abspath))
 
     def has(self, relpath):




More information about the bazaar-commits mailing list