Rev 3900: We now round-trip the wire_bytes. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream

John Arbash Meinel john at arbash-meinel.com
Tue Mar 17 05:00:45 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream

------------------------------------------------------------
revno: 3900
revision-id: john at arbash-meinel.com-20090317050040-q5np851qcvbwma52
parent: john at arbash-meinel.com-20090317042342-3rk1sf50dlx7qxli
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_gc_stream
timestamp: Tue 2009-03-17 00:00:40 -0500
message:
  We now round-trip the wire_bytes.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-17 04:23:42 +0000
+++ b/bzrlib/groupcompress.py	2009-03-17 05:00:40 +0000
@@ -54,7 +54,7 @@
     )
 
 _USE_LZMA = False and (pylzma is not None)
-_NO_LABELS = False
+_NO_LABELS = True
 _FAST = False
 
 def encode_base128_int(val):
@@ -395,7 +395,14 @@
         self._entries[key] = entry
         return entry
 
-    def to_bytes(self, content=''):
+    def set_content(self, content):
+        """Set the content of this block."""
+        self._content_length = len(content)
+        self._content = content
+        self._z_content = None
+        self._z_header_length = None
+
+    def to_bytes(self):
         """Encode the information into a byte stream."""
         compress = zlib.compress
         if _USE_LZMA:
@@ -432,9 +439,17 @@
             z_header_bytes = ''
             z_header_len = 0
             info_len = 0
-        content_len = len(content)
-        z_content_bytes = compress(content)
-        z_content_len = len(z_content_bytes)
+        if self._z_content is not None:
+            content_len = self._content_length
+            z_content_len = self._z_content_length
+            z_content_bytes = self._z_content
+        else:
+            assert self._content is not None
+            content_len = self._content_length
+            z_content_bytes = compress(self._content)
+            self._z_content = z_content_bytes
+            z_content_len = len(z_content_bytes)
+            self._z_content_length = z_content_len
         if _USE_LZMA:
             header = self.GCB_LZ_HEADER
         else:
@@ -501,7 +516,7 @@
             self.storage_kind)
 
 
-class LazyGroupContentManager(object):
+class _LazyGroupContentManager(object):
     """This manages a group of LazyGroupCompressFactory objects."""
 
     def __init__(self, block):
@@ -576,12 +591,65 @@
         del header_bytes
         z_header_bytes_len = len(z_header_bytes)
         assert self._block._z_content is not None
+        block_bytes = self._block.to_bytes()
         lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
-                                       len(self._block._z_content)))
+                                       len(block_bytes)))
         lines.append(z_header_bytes)
-        lines.append(self._block._z_content)
+        lines.append(block_bytes)
+        del z_header_bytes, block_bytes
         return ''.join(lines)
 
+    @classmethod
+    def from_bytes(cls, bytes, line_end):
+        # TODO: This does extra string copying, probably better to do it a
+        #       different way
+        (storage_kind, z_header_len, header_len,
+         block_len, rest) = bytes.split('\n', 4)
+        del bytes
+        if storage_kind != 'groupcompress-block':
+            raise ValueError('Unknown storage kind: %s' % (storage_kind,))
+        z_header_len = int(z_header_len)
+        if len(rest) < z_header_len:
+            raise ValueError('Compressed header len shorter than all bytes')
+        z_header = rest[:z_header_len]
+        header_len = int(header_len)
+        header = zlib.decompress(z_header)
+        if len(header) != header_len:
+            raise ValueError('invalid length for decompressed bytes')
+        del z_header
+        block_len = int(block_len)
+        if len(rest) != z_header_len + block_len:
+            raise ValueError('Invalid length for block')
+        block_bytes = rest[z_header_len:]
+        del rest
+        # So now we have a valid GCB, we just need to parse the factories that
+        # were sent to us
+        header_lines = header.split('\n')
+        del header
+        last = header_lines.pop()
+        if last != '':
+            raise ValueError('header lines did not end with a trailing'
+                             ' newline')
+        if len(header_lines) % 4 != 0:
+            raise ValueError('The header was not an even multiple of 4 lines')
+        block = GroupCompressBlock.from_bytes(block_bytes)
+        del block_bytes
+        result = cls(block)
+        for start in xrange(0, len(header_lines), 4):
+            # intern()?
+            key = tuple(header_lines[start].split('\x00'))
+            parents_line = header_lines[start+1]
+            if parents_line == 'None:':
+                parents = None
+            else:
+                parents = tuple([tuple(segment.split('\x00'))
+                                 for segment in parents_line.split('\t')
+                                  if segment])
+            start_offset = int(header_lines[start+2])
+            end_offset = int(header_lines[start+3])
+            result.add_factory(key, parents, start_offset, end_offset)
+        return result
+
 
 class GroupCompressor(object):
     """Produce a serialised group of compressed texts.
@@ -738,7 +806,8 @@
         """Finish this group, creating a formatted stream."""
         content = ''.join(self.lines)
         self.lines = None
-        return self._block.to_bytes(content)
+        self._block.set_content(content)
+        return self._block.to_bytes()
 
     def output_chunks(self, new_chunks):
         """Output some chunks.
@@ -1178,12 +1247,12 @@
                         block = self._get_block(index_memo)
                         start, end = index_memo[3:5]
                         if manager is None:
-                            manager = LazyGroupContentManager(block)
+                            manager = _LazyGroupContentManager(block)
                         elif manager._block is not block:
                             # Flush and create a new manager
                             for factory in manager.get_record_stream():
                                 yield factory
-                            manager = LazyGroupContentManager(block)
+                            manager = _LazyGroupContentManager(block)
                         manager.add_factory(key, parents, start, end)
             else:
                 if manager is not None:

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-17 04:23:42 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-17 05:00:40 +0000
@@ -213,7 +213,7 @@
         self.assertIs(None, block._z_content)
         block._ensure_content() # Ensure content is safe to call 2x
 
-    def test_from_bytes(self):
+    def test_from_bytes_with_labels(self):
         header = ('key:bing\n'
             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
             'type:fulltext\n'
@@ -242,6 +242,7 @@
                  z_header, z_content)
         block = groupcompress.GroupCompressBlock.from_bytes(
             z_bytes)
+        block._parse_header()
         self.assertIsInstance(block, groupcompress.GroupCompressBlock)
         self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
         bing = block._entries[('bing',)]
@@ -291,11 +292,17 @@
         self.assertEqual(100, e.length)
 
     def test_to_bytes(self):
+        no_labels = groupcompress._NO_LABELS
+        def reset():
+            groupcompress._NO_LABELS = no_labels
+        self.addCleanup(reset)
+        groupcompress._NO_LABELS = False
         gcb = groupcompress.GroupCompressBlock()
         gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
         gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
-        bytes = gcb.to_bytes('this is some content\n'
-                             'this content will be compressed\n')
+        gcb.set_content('this is some content\n'
+                        'this content will be compressed\n')
+        bytes = gcb.to_bytes()
         expected_header =('gcb1z\n' # group compress block v1 zlib
                           '76\n' # Length of compressed bytes
                           '183\n' # Length of uncompressed meta-info
@@ -499,7 +506,7 @@
 
     def test_get_fulltexts(self):
         entries, block = self.make_block(self._texts)
-        manager = groupcompress.LazyGroupContentManager(block)
+        manager = groupcompress._LazyGroupContentManager(block)
         self.add_key_to_manager(('key1',), entries, block, manager)
         self.add_key_to_manager(('key2',), entries, block, manager)
         result_order = []
@@ -511,7 +518,7 @@
 
         # If we build the manager in the opposite order, we should get them
         # back in the opposite order
-        manager = groupcompress.LazyGroupContentManager(block)
+        manager = groupcompress._LazyGroupContentManager(block)
         self.add_key_to_manager(('key2',), entries, block, manager)
         self.add_key_to_manager(('key1',), entries, block, manager)
         result_order = []
@@ -523,21 +530,23 @@
 
     def test__wire_bytes_no_keys(self):
         entries, block = self.make_block(self._texts)
-        manager = groupcompress.LazyGroupContentManager(block)
+        manager = groupcompress._LazyGroupContentManager(block)
         wire_bytes = manager._wire_bytes()
+        block_length = len(block.to_bytes())
         self.assertStartsWith(wire_bytes,
                               'groupcompress-block\n'
                               '8\n' # len(compress(''))
                               '0\n' # len('')
                               '%d\n'
-                              % (len(block._z_content),)
+                              % (block_length,)
                               )
 
     def test__wire_bytes(self):
         entries, block = self.make_block(self._texts)
-        manager = groupcompress.LazyGroupContentManager(block)
+        manager = groupcompress._LazyGroupContentManager(block)
         self.add_key_to_manager(('key1',), entries, block, manager)
         self.add_key_to_manager(('key4',), entries, block, manager)
+        block_bytes = block.to_bytes()
         wire_bytes = manager._wire_bytes()
         (storage_kind, z_header_len, header_len,
          block_len, rest) = wire_bytes.split('\n', 4)
@@ -547,7 +556,7 @@
         self.assertEqual('groupcompress-block', storage_kind)
         self.assertEqual(33, z_header_len)
         self.assertEqual(25, header_len)
-        self.assertEqual(len(block._z_content), block_len)
+        self.assertEqual(len(block_bytes), block_len)
         z_header = rest[:z_header_len]
         header = zlib.decompress(z_header)
         self.assertEqual(header_len, len(header))
@@ -565,4 +574,23 @@
                                 entry4.start, entry4.end),
                             header)
         z_block = rest[z_header_len:]
-        self.assertEqual(block._z_content, z_block)
+        self.assertEqual(block_bytes, z_block)
+
+    def test_from_bytes(self):
+        entries, block = self.make_block(self._texts)
+        manager = groupcompress._LazyGroupContentManager(block)
+        self.add_key_to_manager(('key1',), entries, block, manager)
+        self.add_key_to_manager(('key4',), entries, block, manager)
+        wire_bytes = manager._wire_bytes()
+        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
+        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes,
+            wire_bytes.index('\n')+1)
+        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
+        self.assertEqual(2, len(manager._factories))
+        self.assertEqual(block._z_content, manager._block._z_content)
+        result_order = []
+        for record in manager.get_record_stream():
+            result_order.append(record.key)
+            text = self._texts[record.key]
+            self.assertEqual(text, record.get_bytes_as('fulltext'))
+        self.assertEqual([('key1',), ('key4',)], result_order)



More information about the bazaar-commits mailing list