Rev 34: First cut at meta-info as text form. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index

John Arbash Meinel john at arbash-meinel.com
Wed Mar 4 17:02:43 GMT 2009


At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index

------------------------------------------------------------
revno: 34
revision-id: john at arbash-meinel.com-20090304170218-c3thty7hh2yfrnye
parent: john at arbash-meinel.com-20090304165605-zbap3q69laok4o6p
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: internal_index
timestamp: Wed 2009-03-04 11:02:18 -0600
message:
  First cut at meta-info as text form.
-------------- next part --------------
=== modified file 'errors.py'
--- a/errors.py	2008-07-05 18:15:40 +0000
+++ b/errors.py	2009-03-04 17:02:18 +0000
@@ -18,3 +18,12 @@
 """Error objects for compression functions."""
 
 from bzrlib.errors import BzrError
+
+
+class InvalidGroupCompressBlock(BzrError):
+    """Raised when a block has problems."""
+
+    _fmt = "Invalid Group Compress Block: %(msg)s"
+
+    def __init__(self, msg):
+        self.msg = msg

=== modified file 'groupcompress.py'
--- a/groupcompress.py	2009-03-04 16:01:55 +0000
+++ b/groupcompress.py	2009-03-04 17:02:18 +0000
@@ -42,7 +42,6 @@
     )
 from bzrlib.btree_index import BTreeBuilder
 from bzrlib.lru_cache import LRUSizeCache
-from bzrlib.plugins.groupcompress import equivalence_table
 from bzrlib.tsort import topo_sort
 from bzrlib.versionedfile import (
     adapter_registry,
@@ -51,6 +50,7 @@
     FulltextContentFactory,
     VersionedFiles,
     )
+from bzrlib.plugins.groupcompress import errors as gc_errors
 
 _NO_LABELS = False
 _FAST = False
@@ -104,6 +104,98 @@
     return present_keys
 
 
+class GroupCompressBlockEntry(object):
+    """Track the information about a single object inside a GC group.
+
+    This is generally just the dumb data structure.
+    """
+
+    def __init__(self, key, type, sha1, start, length):
+        self.key = key
+        self.type = type # delta, fulltext, external?
+        self.sha1 = sha1 # Sha1 of content
+        self.start = start # Byte offset to start of data
+        self.length = length # Length of content
+
+
+class GroupCompressBlock(object):
+    """An object which maintains the internal structure of the compressed data.
+
+    This tracks the meta info (start of text, length, type, etc.)
+    """
+
+    # Group Compress Block v1 Plain
+    GCB_HEADER = 'gcb1p\n'
+
+    def __init__(self):
+        # map by key? or just order in file?
+        self._entries = {}
+
+    def _parse_header(self):
+        """Parse the meta-info from the stream."""
+
+    @classmethod
+    def from_zlib_bytes(cls, bytes):
+        """Get the info about this block from the compressed bytes.
+
+        :return: A new GroupCompressBlock
+        """
+        return cls()
+
+    @classmethod
+    def from_bytes(cls, bytes):
+        out = cls()
+        if bytes[:6] != cls.GCB_HEADER:
+            raise gc_errors.InvalidGroupCompressBlock(
+                'bytes did not start with %r' % (cls.GCB_HEADER,))
+        return out
+
+    def extract(self, key, sha1=None):
+        """Extract the text for a specific key.
+
+        :param key: The label used for this content
+        :param sha1: TODO (should we validate only when sha1 is supplied?)
+        :return: The bytes for the content
+        """
+
+    def add_entry(self, key, type, sha1, start, length):
+        """Add new meta info about an entry.
+
+        :param key: The key for the new content
+        :param type: Whether this is a delta or fulltext entry (external?)
+        :param sha1: sha1sum of the fulltext of this entry
+        :param start: where the encoded bytes start
+        :param length: total number of bytes in the encoded form
+        :return: The entry?
+        """
+        entry = GroupCompressBlockEntry(key, type, sha1, start, length)
+        assert key not in self._entries
+        self._entries[key] = entry
+        return entry
+
+    def to_bytes(self):
+        """Encode the information into a byte stream."""
+        chunks = []
+        for key in sorted(self._entries):
+            entry = self._entries[key]
+            chunk = ('key:%s\n'
+                     'type:%s\n'
+                     'sha1:%s\n'
+                     'start:%s\n'
+                     'length:%s\n'
+                     '\n'
+                     ) % ('\x00'.join(entry.key),
+                          entry.type,
+                          entry.sha1,
+                          entry.start,
+                          entry.length,
+                          )
+            chunks.append(chunk)
+        info_len = sum(map(len, chunks))
+        chunks = [self.GCB_HEADER, '%d\n' % (info_len,)] + chunks
+        return ''.join(chunks)
+
+
 class GroupCompressor(object):
     """Produce a serialised group of compressed texts.
 

=== modified file 'tests/test_groupcompress.py'
--- a/tests/test_groupcompress.py	2009-03-04 16:01:55 +0000
+++ b/tests/test_groupcompress.py	2009-03-04 17:02:18 +0000
@@ -49,7 +49,7 @@
     return standard_tests
 
 
-class TestGroupCompressor(TestCaseWithTransport):
+class TestGroupCompressor(tests.TestCase):
     """Tests for GroupCompressor"""
 
     def test_empty_delta(self):
@@ -166,3 +166,43 @@
         # and the second
         self.assertEqual((['common\ndifferent\nmoredifferent\n'],
             sha_2), compressor.extract(('newlabel',)))
+
+
+class TestGroupCompressBlock(tests.TestCase):
+
+    def test_from_empty_bytes(self):
+        self.assertRaises(errors.InvalidGroupCompressBlock,
+                          groupcompress.GroupCompressBlock.from_bytes, '')
+
+    def test_from_bytes(self):
+        block = groupcompress.GroupCompressBlock.from_bytes('gcb1p\n')
+        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
+
+    def test_add_entry(self):
+        gcb = groupcompress.GroupCompressBlock()
+        e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
+        self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
+        self.assertEqual(('foo', 'bar'), e.key)
+        self.assertEqual('fulltext', e.type)
+        self.assertEqual('abcd'*10, e.sha1)
+        self.assertEqual(0, e.start)
+        self.assertEqual(100, e.length)
+
+    def test_to_bytes(self):
+        gcb = groupcompress.GroupCompressBlock()
+        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
+        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
+        self.assertEqualDiff('gcb1p\n' # group compress block v1 plain
+                             '183\n' # Length of all meta-info
+                             'key:bing\n'
+                             'type:fulltext\n'
+                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
+                             'start:100\n'
+                             'length:100\n'
+                             '\n'
+                             'key:foo\x00bar\n'
+                             'type:fulltext\n'
+                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
+                             'start:0\n'
+                             'length:100\n'
+                             '\n', gcb.to_bytes())



More information about the bazaar-commits mailing list