Rev 3900: We now round-trip the wire_bytes. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
John Arbash Meinel
john at arbash-meinel.com
Tue Mar 17 05:00:45 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
------------------------------------------------------------
revno: 3900
revision-id: john at arbash-meinel.com-20090317050040-q5np851qcvbwma52
parent: john at arbash-meinel.com-20090317042342-3rk1sf50dlx7qxli
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_gc_stream
timestamp: Tue 2009-03-17 00:00:40 -0500
message:
We now round-trip the wire_bytes.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-17 04:23:42 +0000
+++ b/bzrlib/groupcompress.py 2009-03-17 05:00:40 +0000
@@ -54,7 +54,7 @@
)
_USE_LZMA = False and (pylzma is not None)
-_NO_LABELS = False
+_NO_LABELS = True
_FAST = False
def encode_base128_int(val):
@@ -395,7 +395,14 @@
self._entries[key] = entry
return entry
- def to_bytes(self, content=''):
+ def set_content(self, content):
+ """Set the content of this block."""
+ self._content_length = len(content)
+ self._content = content
+ self._z_content = None
+ self._z_header_length = None
+
+ def to_bytes(self):
"""Encode the information into a byte stream."""
compress = zlib.compress
if _USE_LZMA:
@@ -432,9 +439,17 @@
z_header_bytes = ''
z_header_len = 0
info_len = 0
- content_len = len(content)
- z_content_bytes = compress(content)
- z_content_len = len(z_content_bytes)
+ if self._z_content is not None:
+ content_len = self._content_length
+ z_content_len = self._z_content_length
+ z_content_bytes = self._z_content
+ else:
+ assert self._content is not None
+ content_len = self._content_length
+ z_content_bytes = compress(self._content)
+ self._z_content = z_content_bytes
+ z_content_len = len(z_content_bytes)
+ self._z_content_length = z_content_len
if _USE_LZMA:
header = self.GCB_LZ_HEADER
else:
@@ -501,7 +516,7 @@
self.storage_kind)
-class LazyGroupContentManager(object):
+class _LazyGroupContentManager(object):
"""This manages a group of LazyGroupCompressFactory objects."""
def __init__(self, block):
@@ -576,12 +591,65 @@
del header_bytes
z_header_bytes_len = len(z_header_bytes)
assert self._block._z_content is not None
+ block_bytes = self._block.to_bytes()
lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
- len(self._block._z_content)))
+ len(block_bytes)))
lines.append(z_header_bytes)
- lines.append(self._block._z_content)
+ lines.append(block_bytes)
+ del z_header_bytes, block_bytes
return ''.join(lines)
+ @classmethod
+ def from_bytes(cls, bytes, line_end):
+ # TODO: This does extra string copying, probably better to do it a
+ # different way
+ (storage_kind, z_header_len, header_len,
+ block_len, rest) = bytes.split('\n', 4)
+ del bytes
+ if storage_kind != 'groupcompress-block':
+ raise ValueError('Unknown storage kind: %s' % (storage_kind,))
+ z_header_len = int(z_header_len)
+ if len(rest) < z_header_len:
+ raise ValueError('Compressed header len shorter than all bytes')
+ z_header = rest[:z_header_len]
+ header_len = int(header_len)
+ header = zlib.decompress(z_header)
+ if len(header) != header_len:
+ raise ValueError('invalid length for decompressed bytes')
+ del z_header
+ block_len = int(block_len)
+ if len(rest) != z_header_len + block_len:
+ raise ValueError('Invalid length for block')
+ block_bytes = rest[z_header_len:]
+ del rest
+ # So now we have a valid GCB, we just need to parse the factories that
+ # were sent to us
+ header_lines = header.split('\n')
+ del header
+ last = header_lines.pop()
+ if last != '':
+ raise ValueError('header lines did not end with a trailing'
+ ' newline')
+ if len(header_lines) % 4 != 0:
+ raise ValueError('The header was not an even multiple of 4 lines')
+ block = GroupCompressBlock.from_bytes(block_bytes)
+ del block_bytes
+ result = cls(block)
+ for start in xrange(0, len(header_lines), 4):
+ # intern()?
+ key = tuple(header_lines[start].split('\x00'))
+ parents_line = header_lines[start+1]
+ if parents_line == 'None:':
+ parents = None
+ else:
+ parents = tuple([tuple(segment.split('\x00'))
+ for segment in parents_line.split('\t')
+ if segment])
+ start_offset = int(header_lines[start+2])
+ end_offset = int(header_lines[start+3])
+ result.add_factory(key, parents, start_offset, end_offset)
+ return result
+
class GroupCompressor(object):
"""Produce a serialised group of compressed texts.
@@ -738,7 +806,8 @@
"""Finish this group, creating a formatted stream."""
content = ''.join(self.lines)
self.lines = None
- return self._block.to_bytes(content)
+ self._block.set_content(content)
+ return self._block.to_bytes()
def output_chunks(self, new_chunks):
"""Output some chunks.
@@ -1178,12 +1247,12 @@
block = self._get_block(index_memo)
start, end = index_memo[3:5]
if manager is None:
- manager = LazyGroupContentManager(block)
+ manager = _LazyGroupContentManager(block)
elif manager._block is not block:
# Flush and create a new manager
for factory in manager.get_record_stream():
yield factory
- manager = LazyGroupContentManager(block)
+ manager = _LazyGroupContentManager(block)
manager.add_factory(key, parents, start, end)
else:
if manager is not None:
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-17 04:23:42 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-17 05:00:40 +0000
@@ -213,7 +213,7 @@
self.assertIs(None, block._z_content)
block._ensure_content() # Ensure content is safe to call 2x
- def test_from_bytes(self):
+ def test_from_bytes_with_labels(self):
header = ('key:bing\n'
'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
'type:fulltext\n'
@@ -242,6 +242,7 @@
z_header, z_content)
block = groupcompress.GroupCompressBlock.from_bytes(
z_bytes)
+ block._parse_header()
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
bing = block._entries[('bing',)]
@@ -291,11 +292,17 @@
self.assertEqual(100, e.length)
def test_to_bytes(self):
+ no_labels = groupcompress._NO_LABELS
+ def reset():
+ groupcompress._NO_LABELS = no_labels
+ self.addCleanup(reset)
+ groupcompress._NO_LABELS = False
gcb = groupcompress.GroupCompressBlock()
gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
- bytes = gcb.to_bytes('this is some content\n'
- 'this content will be compressed\n')
+ gcb.set_content('this is some content\n'
+ 'this content will be compressed\n')
+ bytes = gcb.to_bytes()
expected_header =('gcb1z\n' # group compress block v1 zlib
'76\n' # Length of compressed bytes
'183\n' # Length of uncompressed meta-info
@@ -499,7 +506,7 @@
def test_get_fulltexts(self):
entries, block = self.make_block(self._texts)
- manager = groupcompress.LazyGroupContentManager(block)
+ manager = groupcompress._LazyGroupContentManager(block)
self.add_key_to_manager(('key1',), entries, block, manager)
self.add_key_to_manager(('key2',), entries, block, manager)
result_order = []
@@ -511,7 +518,7 @@
# If we build the manager in the opposite order, we should get them
# back in the opposite order
- manager = groupcompress.LazyGroupContentManager(block)
+ manager = groupcompress._LazyGroupContentManager(block)
self.add_key_to_manager(('key2',), entries, block, manager)
self.add_key_to_manager(('key1',), entries, block, manager)
result_order = []
@@ -523,21 +530,23 @@
def test__wire_bytes_no_keys(self):
entries, block = self.make_block(self._texts)
- manager = groupcompress.LazyGroupContentManager(block)
+ manager = groupcompress._LazyGroupContentManager(block)
wire_bytes = manager._wire_bytes()
+ block_length = len(block.to_bytes())
self.assertStartsWith(wire_bytes,
'groupcompress-block\n'
'8\n' # len(compress(''))
'0\n' # len('')
'%d\n'
- % (len(block._z_content),)
+ % (block_length,)
)
def test__wire_bytes(self):
entries, block = self.make_block(self._texts)
- manager = groupcompress.LazyGroupContentManager(block)
+ manager = groupcompress._LazyGroupContentManager(block)
self.add_key_to_manager(('key1',), entries, block, manager)
self.add_key_to_manager(('key4',), entries, block, manager)
+ block_bytes = block.to_bytes()
wire_bytes = manager._wire_bytes()
(storage_kind, z_header_len, header_len,
block_len, rest) = wire_bytes.split('\n', 4)
@@ -547,7 +556,7 @@
self.assertEqual('groupcompress-block', storage_kind)
self.assertEqual(33, z_header_len)
self.assertEqual(25, header_len)
- self.assertEqual(len(block._z_content), block_len)
+ self.assertEqual(len(block_bytes), block_len)
z_header = rest[:z_header_len]
header = zlib.decompress(z_header)
self.assertEqual(header_len, len(header))
@@ -565,4 +574,23 @@
entry4.start, entry4.end),
header)
z_block = rest[z_header_len:]
- self.assertEqual(block._z_content, z_block)
+ self.assertEqual(block_bytes, z_block)
+
+ def test_from_bytes(self):
+ entries, block = self.make_block(self._texts)
+ manager = groupcompress._LazyGroupContentManager(block)
+ self.add_key_to_manager(('key1',), entries, block, manager)
+ self.add_key_to_manager(('key4',), entries, block, manager)
+ wire_bytes = manager._wire_bytes()
+ self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
+ manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes,
+ wire_bytes.index('\n')+1)
+ self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
+ self.assertEqual(2, len(manager._factories))
+ self.assertEqual(block._z_content, manager._block._z_content)
+ result_order = []
+ for record in manager.get_record_stream():
+ result_order.append(record.key)
+ text = self._texts[record.key]
+ self.assertEqual(text, record.get_bytes_as('fulltext'))
+ self.assertEqual([('key1',), ('key4',)], result_order)
More information about the bazaar-commits
mailing list