Rev 3899: We now have a general header for the GC block. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
John Arbash Meinel
john at arbash-meinel.com
Tue Mar 17 04:23:47 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
------------------------------------------------------------
revno: 3899
revision-id: john at arbash-meinel.com-20090317042342-3rk1sf50dlx7qxli
parent: john at arbash-meinel.com-20090317034324-1nqpftq6na4rz8ft
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_gc_stream
timestamp: Mon 2009-03-16 23:23:42 -0500
message:
We now have a general header for the GC block.
It is pretty much just the trade between the GC Label structure and the
formatting used by knit-delta-closure. Probably the main difference is
compressing the bytes, but I found that to be pretty important
for the GC Label, and it isn't hard to do.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-17 03:43:24 +0000
+++ b/bzrlib/groupcompress.py 2009-03-17 04:23:42 +0000
@@ -542,13 +542,45 @@
# use a heuristic to decide that we need to generate a new group.
# (that could be if *any* bytes are unused, or just if more than
# XX percent is unused)
- lines = ['groupcompress-block']
- # The minimal info we need is the key and the start offset. The length
- # and type are encoded in the record itself. However, passing in the
- # The list of keys, and the start offset, the length
+ # The outer block starts with:
+ # 'groupcompress-block\n'
+ # <length of compressed key info>\n
+ # <length of uncompressed info>\n
+ # <length of gc block>\n
+ # <header bytes>
+ # <gc-block>
+ lines = ['groupcompress-block\n']
+ # The minimal info we need is the key, the start offset, and the
+ # parents. The length and type are encoded in the record itself.
+ # However, passing in the other bits makes it easier. The list of
+ # keys, and the start offset, the length
+ # 1 line key
+ # 1 line with parents, '' for ()
+ # 1 line for start offset
+ # 1 line for end byte
+ header_lines = []
for factory in self._factories:
- pass
- return ''
+ key_bytes = '\x00'.join(factory.key)
+ parents = factory.parents
+ if parents is None:
+ parent_bytes = 'None:'
+ else:
+ parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
+ record_header = '%s\n%s\n%d\n%d\n' % (
+ key_bytes, parent_bytes, factory._start, factory._end)
+ header_lines.append(record_header)
+ header_bytes = ''.join(header_lines)
+ del header_lines
+ header_bytes_len = len(header_bytes)
+ z_header_bytes = zlib.compress(header_bytes)
+ del header_bytes
+ z_header_bytes_len = len(z_header_bytes)
+ assert self._block._z_content is not None
+ lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
+ len(self._block._z_content)))
+ lines.append(z_header_bytes)
+ lines.append(self._block._z_content)
+ return ''.join(lines)
class GroupCompressor(object):
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-17 03:43:24 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-17 04:23:42 +0000
@@ -521,5 +521,48 @@
self.assertEqual(text, record.get_bytes_as('fulltext'))
self.assertEqual([('key2',), ('key1',)], result_order)
+ def test__wire_bytes_no_keys(self):
+ entries, block = self.make_block(self._texts)
+ manager = groupcompress.LazyGroupContentManager(block)
+ wire_bytes = manager._wire_bytes()
+ self.assertStartsWith(wire_bytes,
+ 'groupcompress-block\n'
+ '8\n' # len(compress(''))
+ '0\n' # len('')
+ '%d\n'
+ % (len(block._z_content),)
+ )
+
def test__wire_bytes(self):
entries, block = self.make_block(self._texts)
+ manager = groupcompress.LazyGroupContentManager(block)
+ self.add_key_to_manager(('key1',), entries, block, manager)
+ self.add_key_to_manager(('key4',), entries, block, manager)
+ wire_bytes = manager._wire_bytes()
+ (storage_kind, z_header_len, header_len,
+ block_len, rest) = wire_bytes.split('\n', 4)
+ z_header_len = int(z_header_len)
+ header_len = int(header_len)
+ block_len = int(block_len)
+ self.assertEqual('groupcompress-block', storage_kind)
+ self.assertEqual(33, z_header_len)
+ self.assertEqual(25, header_len)
+ self.assertEqual(len(block._z_content), block_len)
+ z_header = rest[:z_header_len]
+ header = zlib.decompress(z_header)
+ self.assertEqual(header_len, len(header))
+ entry1 = entries[('key1',)]
+ entry4 = entries[('key4',)]
+ self.assertEqualDiff('key1\n'
+ '\n' # no parents
+ '%d\n' # start offset
+ '%d\n' # end byte
+ 'key4\n'
+ '\n'
+ '%d\n'
+ '%d\n'
+ % (entry1.start, entry1.end,
+ entry4.start, entry4.end),
+ header)
+ z_block = rest[z_header_len:]
+ self.assertEqual(block._z_content, z_block)
More information about the bazaar-commits
mailing list