Rev 3899: We now have a general header for the GC block. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream

John Arbash Meinel john at arbash-meinel.com
Tue Mar 17 04:23:47 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream

------------------------------------------------------------
revno: 3899
revision-id: john at arbash-meinel.com-20090317042342-3rk1sf50dlx7qxli
parent: john at arbash-meinel.com-20090317034324-1nqpftq6na4rz8ft
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_gc_stream
timestamp: Mon 2009-03-16 23:23:42 -0500
message:
  We now have a general header for the GC block.
  
  It is pretty much just the trade between the GC Label structure and the
  formatting used by knit-delta-closure. Probably the main difference is
  compressing the bytes, but I found that to be pretty important
  for the GC Label, and it isn't hard to do.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-17 03:43:24 +0000
+++ b/bzrlib/groupcompress.py	2009-03-17 04:23:42 +0000
@@ -542,13 +542,45 @@
         #       use a heuristic to decide that we need to generate a new group.
         #       (that could be if *any* bytes are unused, or just if more than
         #       XX percent is unused)
-        lines = ['groupcompress-block']
-        # The minimal info we need is the key and the start offset. The length
-        # and type are encoded in the record itself. However, passing in the 
-        # The list of keys, and the start offset, the length
+        # The outer block starts with:
+        #   'groupcompress-block\n'
+        #   <length of compressed key info>\n
+        #   <length of uncompressed info>\n
+        #   <length of gc block>\n
+        #   <header bytes>
+        #   <gc-block>
+        lines = ['groupcompress-block\n']
+        # The minimal info we need is the key, the start offset, and the
+        # parents. The length and type are encoded in the record itself.
+        # However, passing in the other bits makes it easier.  The list of
+        # keys, and the start offset, the length
+        # 1 line key
+        # 1 line with parents, '' for ()
+        # 1 line for start offset
+        # 1 line for end byte
+        header_lines = []
         for factory in self._factories:
-            pass
-        return ''
+            key_bytes = '\x00'.join(factory.key)
+            parents = factory.parents
+            if parents is None:
+                parent_bytes = 'None:'
+            else:
+                parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
+            record_header = '%s\n%s\n%d\n%d\n' % (
+                key_bytes, parent_bytes, factory._start, factory._end)
+            header_lines.append(record_header)
+        header_bytes = ''.join(header_lines)
+        del header_lines
+        header_bytes_len = len(header_bytes)
+        z_header_bytes = zlib.compress(header_bytes)
+        del header_bytes
+        z_header_bytes_len = len(z_header_bytes)
+        assert self._block._z_content is not None
+        lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
+                                       len(self._block._z_content)))
+        lines.append(z_header_bytes)
+        lines.append(self._block._z_content)
+        return ''.join(lines)
 
 
 class GroupCompressor(object):

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-17 03:43:24 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-17 04:23:42 +0000
@@ -521,5 +521,48 @@
             self.assertEqual(text, record.get_bytes_as('fulltext'))
         self.assertEqual([('key2',), ('key1',)], result_order)
 
+    def test__wire_bytes_no_keys(self):
+        entries, block = self.make_block(self._texts)
+        manager = groupcompress.LazyGroupContentManager(block)
+        wire_bytes = manager._wire_bytes()
+        self.assertStartsWith(wire_bytes,
+                              'groupcompress-block\n'
+                              '8\n' # len(compress(''))
+                              '0\n' # len('')
+                              '%d\n'
+                              % (len(block._z_content),)
+                              )
+
     def test__wire_bytes(self):
         entries, block = self.make_block(self._texts)
+        manager = groupcompress.LazyGroupContentManager(block)
+        self.add_key_to_manager(('key1',), entries, block, manager)
+        self.add_key_to_manager(('key4',), entries, block, manager)
+        wire_bytes = manager._wire_bytes()
+        (storage_kind, z_header_len, header_len,
+         block_len, rest) = wire_bytes.split('\n', 4)
+        z_header_len = int(z_header_len)
+        header_len = int(header_len)
+        block_len = int(block_len)
+        self.assertEqual('groupcompress-block', storage_kind)
+        self.assertEqual(33, z_header_len)
+        self.assertEqual(25, header_len)
+        self.assertEqual(len(block._z_content), block_len)
+        z_header = rest[:z_header_len]
+        header = zlib.decompress(z_header)
+        self.assertEqual(header_len, len(header))
+        entry1 = entries[('key1',)]
+        entry4 = entries[('key4',)]
+        self.assertEqualDiff('key1\n'
+                             '\n'  # no parents
+                             '%d\n' # start offset
+                             '%d\n' # end byte
+                             'key4\n'
+                             '\n'
+                             '%d\n'
+                             '%d\n'
+                             % (entry1.start, entry1.end,
+                                entry4.start, entry4.end),
+                            header)
+        z_block = rest[z_header_len:]
+        self.assertEqual(block._z_content, z_block)



More information about the bazaar-commits mailing list