Rev 34: First cut at meta-info as text form. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 4 17:02:43 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index
------------------------------------------------------------
revno: 34
revision-id: john at arbash-meinel.com-20090304170218-c3thty7hh2yfrnye
parent: john at arbash-meinel.com-20090304165605-zbap3q69laok4o6p
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: internal_index
timestamp: Wed 2009-03-04 11:02:18 -0600
message:
First cut at meta-info as text form.
-------------- next part --------------
=== modified file 'errors.py'
--- a/errors.py 2008-07-05 18:15:40 +0000
+++ b/errors.py 2009-03-04 17:02:18 +0000
@@ -18,3 +18,12 @@
"""Error objects for compression functions."""
from bzrlib.errors import BzrError
+
+
+class InvalidGroupCompressBlock(BzrError):
+ """Raised when a block has problems."""
+
+ _fmt = "Invalid Group Compress Block: %(msg)s"
+
+ def __init__(self, msg):
+ self.msg = msg
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-04 16:01:55 +0000
+++ b/groupcompress.py 2009-03-04 17:02:18 +0000
@@ -42,7 +42,6 @@
)
from bzrlib.btree_index import BTreeBuilder
from bzrlib.lru_cache import LRUSizeCache
-from bzrlib.plugins.groupcompress import equivalence_table
from bzrlib.tsort import topo_sort
from bzrlib.versionedfile import (
adapter_registry,
@@ -51,6 +50,7 @@
FulltextContentFactory,
VersionedFiles,
)
+from bzrlib.plugins.groupcompress import errors as gc_errors
_NO_LABELS = False
_FAST = False
@@ -104,6 +104,98 @@
return present_keys
+class GroupCompressBlockEntry(object):
+ """Track the information about a single object inside a GC group.
+
+ This is generally just the dumb data structure.
+ """
+
+ def __init__(self, key, type, sha1, start, length):
+ self.key = key
+ self.type = type # delta, fulltext, external?
+ self.sha1 = sha1 # Sha1 of content
+ self.start = start # Byte offset to start of data
+ self.length = length # Length of content
+
+
+class GroupCompressBlock(object):
+ """An object which maintains the internal structure of the compressed data.
+
+ This tracks the meta info (start of text, length, type, etc.)
+ """
+
+ # Group Compress Block v1 Plain
+ GCB_HEADER = 'gcb1p\n'
+
+ def __init__(self):
+ # map by key? or just order in file?
+ self._entries = {}
+
+ def _parse_header(self):
+ """Parse the meta-info from the stream."""
+
+ @classmethod
+ def from_zlib_bytes(cls, bytes):
+ """Get the info about this block from the compressed bytes.
+
+ :return: A new GroupCompressBlock
+ """
+ return cls()
+
+ @classmethod
+ def from_bytes(cls, bytes):
+ out = cls()
+ if bytes[:6] != cls.GCB_HEADER:
+ raise gc_errors.InvalidGroupCompressBlock(
+ 'bytes did not start with %r' % (cls.GCB_HEADER,))
+ return out
+
+ def extract(self, key, sha1=None):
+ """Extract the text for a specific key.
+
+ :param key: The label used for this content
+ :param sha1: TODO (should we validate only when sha1 is supplied?)
+ :return: The bytes for the content
+ """
+
+ def add_entry(self, key, type, sha1, start, length):
+ """Add new meta info about an entry.
+
+ :param key: The key for the new content
+ :param type: Whether this is a delta or fulltext entry (external?)
+ :param sha1: sha1sum of the fulltext of this entry
+ :param start: where the encoded bytes start
+ :param length: total number of bytes in the encoded form
+ :return: The entry?
+ """
+ entry = GroupCompressBlockEntry(key, type, sha1, start, length)
+ assert key not in self._entries
+ self._entries[key] = entry
+ return entry
+
+ def to_bytes(self):
+ """Encode the information into a byte stream."""
+ chunks = []
+ for key in sorted(self._entries):
+ entry = self._entries[key]
+ chunk = ('key:%s\n'
+ 'type:%s\n'
+ 'sha1:%s\n'
+ 'start:%s\n'
+ 'length:%s\n'
+ '\n'
+ ) % ('\x00'.join(entry.key),
+ entry.type,
+ entry.sha1,
+ entry.start,
+ entry.length,
+ )
+ chunks.append(chunk)
+ info_len = sum(map(len, chunks))
+ chunks = [self.GCB_HEADER, '%d\n' % (info_len,)] + chunks
+ return ''.join(chunks)
+
+
class GroupCompressor(object):
"""Produce a serialised group of compressed texts.
=== modified file 'tests/test_groupcompress.py'
--- a/tests/test_groupcompress.py 2009-03-04 16:01:55 +0000
+++ b/tests/test_groupcompress.py 2009-03-04 17:02:18 +0000
@@ -49,7 +49,7 @@
return standard_tests
-class TestGroupCompressor(TestCaseWithTransport):
+class TestGroupCompressor(tests.TestCase):
"""Tests for GroupCompressor"""
def test_empty_delta(self):
@@ -166,3 +166,43 @@
# and the second
self.assertEqual((['common\ndifferent\nmoredifferent\n'],
sha_2), compressor.extract(('newlabel',)))
+
+
+class TestGroupCompressBlock(tests.TestCase):
+
+ def test_from_empty_bytes(self):
+ self.assertRaises(errors.InvalidGroupCompressBlock,
+ groupcompress.GroupCompressBlock.from_bytes, '')
+
+ def test_from_bytes(self):
+ block = groupcompress.GroupCompressBlock.from_bytes('gcb1p\n')
+ self.assertIsInstance(block, groupcompress.GroupCompressBlock)
+
+ def test_add_entry(self):
+ gcb = groupcompress.GroupCompressBlock()
+ e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
+ self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
+ self.assertEqual(('foo', 'bar'), e.key)
+ self.assertEqual('fulltext', e.type)
+ self.assertEqual('abcd'*10, e.sha1)
+ self.assertEqual(0, e.start)
+ self.assertEqual(100, e.length)
+
+ def test_to_bytes(self):
+ gcb = groupcompress.GroupCompressBlock()
+ gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
+ gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
+ self.assertEqualDiff('gcb1p\n' # group compress block v1 plain
+ '183\n' # Length of all meta-info
+ 'key:bing\n'
+ 'type:fulltext\n'
+ 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
+ 'start:100\n'
+ 'length:100\n'
+ '\n'
+ 'key:foo\x00bar\n'
+ 'type:fulltext\n'
+ 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
+ 'start:0\n'
+ 'length:100\n'
+ '\n', gcb.to_bytes())
More information about the bazaar-commits
mailing list