Rev 2667: (robertc) Add support to the bzrlib.pack interface for arbitrary-record access via a readv adapter for transports. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Aug 2 07:33:24 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2667
revision-id: pqm at pqm.ubuntu.com-20070802063321-lpx3oazcxyac24oa
parent: pqm at pqm.ubuntu.com-20070801171451-en3tds1hzlru2j83
parent: robertc at robertcollins.net-20070802053516-32sp0lvric0x5ugl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2007-08-02 07:33:21 +0100
message:
(robertc) Add support to the bzrlib.pack interface for arbitrary-record access via a readv adapter for transports. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
bzrlib/tests/test_pack.py test_container.py-20070607160755-tr8zc26q18rn0jnb-2
bzrlib/transport/local.py local_transport.py-20050711165921-9b1f142bfe480c24
------------------------------------------------------------
revno: 2661.2.3
merged: robertc at robertcollins.net-20070802053516-32sp0lvric0x5ugl
parent: robertc at robertcollins.net-20070802031746-mpnoaxym829719w6
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack
timestamp: Thu 2007-08-02 15:35:16 +1000
message:
Review feedback.
------------------------------------------------------------
revno: 2661.2.2
merged: robertc at robertcollins.net-20070802031746-mpnoaxym829719w6
parent: robertc at robertcollins.net-20070802021817-n8a86kevyvk2f9jo
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack
timestamp: Thu 2007-08-02 13:17:46 +1000
message:
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
files that are stored on a transport. (Robert Collins)
------------------------------------------------------------
revno: 2661.2.1
merged: robertc at robertcollins.net-20070802021817-n8a86kevyvk2f9jo
parent: pqm at pqm.ubuntu.com-20070730235409-pfqxlkh2dcs95u70
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack
timestamp: Thu 2007-08-02 12:18:17 +1000
message:
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
callers when inserting data, allowing generation of readv style access
during pack creation, without needing a separate pass across the output
pack to gather such details. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS 2007-08-01 17:14:51 +0000
+++ b/NEWS 2007-08-02 06:33:21 +0000
@@ -184,6 +184,14 @@
* Graph now has an is_ancestor method, various bits use it.
(Aaron Bentley)
+ * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
+ callers when inserting data, allowing generation of readv style access
+ during pack creation, without needing a separate pass across the output
+ pack to gather such details. (Robert Collins)
+
+ * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
+ files that are stored on a transport. (Robert Collins)
+
TESTING:
* Remove selftest ``--clean-output``, ``--numbered-dirs`` and
=== modified file 'bzrlib/pack.py'
--- a/bzrlib/pack.py 2007-07-03 04:12:19 +0000
+++ b/bzrlib/pack.py 2007-08-02 03:17:46 +0000
@@ -19,6 +19,7 @@
"Containers" and "records" are described in doc/developers/container-format.txt.
"""
+from cStringIO import StringIO
import re
from bzrlib import errors
@@ -66,18 +67,34 @@
:param write_func: a callable that will be called when this
ContainerWriter needs to write some bytes.
"""
- self.write_func = write_func
+ self._write_func = write_func
+ self.current_offset = 0
def begin(self):
"""Begin writing a container."""
self.write_func(FORMAT_ONE + "\n")
+ def write_func(self, bytes):
+ self._write_func(bytes)
+ self.current_offset += len(bytes)
+
def end(self):
"""Finish writing a container."""
self.write_func("E")
def add_bytes_record(self, bytes, names):
- """Add a Bytes record with the given names."""
+ """Add a Bytes record with the given names.
+
+ :param bytes: The bytes to insert.
+ :param names: The names to give the inserted bytes.
+ :return: An offset, length tuple. The offset is the offset
+ of the record within the container, and the length is the
+ length of data that will need to be read to reconstitute the
+ record. These offset and length can only be used with the pack
+ interface - they might be offset by headers or other such details
+ and thus are only suitable for use by a ContainerReader.
+ """
+ current_offset = self.current_offset
# Kind marker
self.write_func("B")
# Length
@@ -92,6 +109,54 @@
self.write_func("\n")
# Finally, the contents.
self.write_func(bytes)
+ # return a memo of where we wrote data to allow random access.
+ return current_offset, self.current_offset - current_offset
+
+
+class ReadVFile(object):
+ """Adapt a readv result iterator to a file like protocol."""
+
+ def __init__(self, readv_result):
+ self.readv_result = readv_result
+ # the most recent readv result block
+ self._string = None
+
+ def _next(self):
+ if (self._string is None or
+ self._string.tell() == self._string_length):
+ length, data = self.readv_result.next()
+ self._string_length = len(data)
+ self._string = StringIO(data)
+
+ def read(self, length):
+ self._next()
+ result = self._string.read(length)
+ if len(result) < length:
+ raise errors.BzrError('request for too much data from a readv hunk.')
+ return result
+
+ def readline(self):
+ """Note that readline will not cross readv segments."""
+ self._next()
+ result = self._string.readline()
+ if self._string.tell() == self._string_length and result[-1] != '\n':
+ raise errors.BzrError('short readline in the readvfile hunk.')
+ return result
+
+
+def make_readv_reader(transport, filename, requested_records):
+ """Create a ContainerReader that will read selected records only.
+
+ :param transport: The transport the pack file is located on.
+ :param filename: The filename of the pack file.
+ :param requested_records: The record offset, length tuples as returned
+ by add_bytes_record for the desired records.
+ """
+ readv_blocks = [(0, len(FORMAT_ONE)+1)]
+ readv_blocks.extend(requested_records)
+ result = ContainerReader(ReadVFile(
+ transport.readv(filename, readv_blocks)))
+ return result
class BaseReader(object):
=== modified file 'bzrlib/tests/test_pack.py'
--- a/bzrlib/tests/test_pack.py 2007-07-03 04:05:08 +0000
+++ b/bzrlib/tests/test_pack.py 2007-08-02 05:35:16 +0000
@@ -54,7 +54,8 @@
output = StringIO()
writer = pack.ContainerWriter(output.write)
writer.begin()
- writer.add_bytes_record('abc', names=[])
+ offset, length = writer.add_bytes_record('abc', names=[])
+ self.assertEqual((42, 7), (offset, length))
self.assertEqual('Bazaar pack format 1 (introduced in 0.18)\nB3\n\nabc',
output.getvalue())
@@ -63,7 +64,8 @@
output = StringIO()
writer = pack.ContainerWriter(output.write)
writer.begin()
- writer.add_bytes_record('abc', names=['name1'])
+ offset, length = writer.add_bytes_record('abc', names=['name1'])
+ self.assertEqual((42, 13), (offset, length))
self.assertEqual(
'Bazaar pack format 1 (introduced in 0.18)\n'
'B3\nname1\n\nabc',
@@ -74,12 +76,26 @@
output = StringIO()
writer = pack.ContainerWriter(output.write)
writer.begin()
- writer.add_bytes_record('abc', names=['name1', 'name2'])
+ offset, length = writer.add_bytes_record('abc', names=['name1', 'name2'])
+ self.assertEqual((42, 19), (offset, length))
self.assertEqual(
'Bazaar pack format 1 (introduced in 0.18)\n'
'B3\nname1\nname2\n\nabc',
output.getvalue())
+ def test_add_second_bytes_record_gets_higher_offset(self):
+ output = StringIO()
+ writer = pack.ContainerWriter(output.write)
+ writer.begin()
+ writer.add_bytes_record('abc', names=[])
+ offset, length = writer.add_bytes_record('abc', names=[])
+ self.assertEqual((49, 7), (offset, length))
+ self.assertEqual(
+ 'Bazaar pack format 1 (introduced in 0.18)\n'
+ 'B3\n\nabc'
+ 'B3\n\nabc',
+ output.getvalue())
+
def test_add_bytes_record_invalid_name(self):
"""Adding a Bytes record with a name with whitespace in it raises
InvalidRecordError.
@@ -375,3 +391,70 @@
self.assertEqual('', get_bytes(99))
+class TestMakeReadvReader(tests.TestCaseWithTransport):
+
+ def test_read_skipping_records(self):
+ pack_data = StringIO()
+ writer = pack.ContainerWriter(pack_data.write)
+ writer.begin()
+ memos = []
+ memos.append(writer.add_bytes_record('abc', names=[]))
+ memos.append(writer.add_bytes_record('def', names=['name1']))
+ memos.append(writer.add_bytes_record('ghi', names=['name2']))
+ memos.append(writer.add_bytes_record('jkl', names=[]))
+ writer.end()
+ transport = self.get_transport()
+ transport.put_bytes('mypack', pack_data.getvalue())
+ requested_records = [memos[0], memos[2]]
+ reader = pack.make_readv_reader(transport, 'mypack', requested_records)
+ result = []
+ for names, reader_func in reader.iter_records():
+ result.append((names, reader_func(None)))
+ self.assertEqual([([], 'abc'), (['name2'], 'ghi')], result)
+
+
+class TestReadvFile(tests.TestCaseWithTransport):
+ """Tests of the ReadVFile class.
+
+ Error cases are deliberately undefined: this code adapts the underlying
+ transport interface to a single 'streaming read' interface as
+ ContainerReader needs.
+ """
+
+ def test_read_bytes(self):
+ """Test reading of both single bytes and all bytes in a hunk."""
+ transport = self.get_transport()
+ transport.put_bytes('sample', '0123456789')
+ f = pack.ReadVFile(transport.readv('sample', [(0,1), (1,2), (4,1), (6,2)]))
+ results = []
+ results.append(f.read(1))
+ results.append(f.read(2))
+ results.append(f.read(1))
+ results.append(f.read(1))
+ results.append(f.read(1))
+ self.assertEqual(['0', '12', '4', '6', '7'], results)
+
+ def test_readline(self):
+ """Test using readline() as ContainerReader does.
+
+ This is always within a readv hunk, never across it.
+ """
+ transport = self.get_transport()
+ transport.put_bytes('sample', '0\n2\n4\n')
+ f = pack.ReadVFile(transport.readv('sample', [(0,2), (2,4)]))
+ results = []
+ results.append(f.readline())
+ results.append(f.readline())
+ results.append(f.readline())
+ self.assertEqual(['0\n', '2\n', '4\n'], results)
+
+ def test_readline_and_read(self):
+ """Test exercising one byte reads, readline, and then read again."""
+ transport = self.get_transport()
+ transport.put_bytes('sample', '0\n2\n4\n')
+ f = pack.ReadVFile(transport.readv('sample', [(0,6)]))
+ results = []
+ results.append(f.read(1))
+ results.append(f.readline())
+ results.append(f.read(4))
+ self.assertEqual(['0', '\n', '2\n4\n'], results)
=== modified file 'bzrlib/transport/local.py'
--- a/bzrlib/transport/local.py 2007-07-20 03:20:20 +0000
+++ b/bzrlib/transport/local.py 2007-08-02 03:17:46 +0000
@@ -127,7 +127,7 @@
abspath = u'.'
return urlutils.file_relpath(
- urlutils.strip_trailing_slash(self.base),
+ urlutils.strip_trailing_slash(self.base),
urlutils.strip_trailing_slash(abspath))
def has(self, relpath):
More information about the bazaar-commits
mailing list