Rev 4220: merge bbc at 3909 in file:///home/vila/src/bzr/experimental/gc-py-bbc/
Vincent Ladeuil
v.ladeuil+lp at free.fr
Tue Mar 31 11:02:42 BST 2009
At file:///home/vila/src/bzr/experimental/gc-py-bbc/
------------------------------------------------------------
revno: 4220 [merge]
revision-id: v.ladeuil+lp at free.fr-20090331100241-60fp95ukghur25dk
parent: v.ladeuil+lp at free.fr-20090331074343-wghocs28bnzbjlh2
parent: v.ladeuil+lp at free.fr-20090331095705-vqt1dsw9srrjne11
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: bbc
timestamp: Tue 2009-03-31 12:02:41 +0200
message:
merge bbc at 3909
modified:
bzrlib/_groupcompress_pyx.pyx _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
bzrlib/delta.h delta.h-20090227173129-qsu3u43vowf1q3ay-1
bzrlib/diff-delta.c diffdelta.c-20090226042143-l9wzxynyuxnb5hus-1
bzrlib/groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test__groupcompress_pyx.py test__groupcompress_-20080724145854-koifwb7749cfzrvj-1
bzrlib/tests/test_groupcompress.py test_groupcompress.p-20080705181503-ccbxd6xuy1bdnrpu-13
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx 2009-03-24 20:02:26 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx 2009-03-27 16:07:44 +0000
@@ -251,15 +251,6 @@
data = <unsigned char *>delta
top = data + delta_size
- # make sure the orig file size matches what we expect
- # XXX: gcc warns because data isn't defined as 'const'
- size = get_delta_hdr_size(&data, top)
- if (size > source_size):
- # XXX: mismatched source size
- raise RuntimeError('source size %d < expected source size %d'
- % (source_size, size))
- source_size = size
-
# now the result size
size = get_delta_hdr_size(&data, top)
result = PyString_FromStringAndSize(NULL, size)
=== modified file 'bzrlib/delta.h'
--- a/bzrlib/delta.h 2009-03-26 16:22:58 +0000
+++ b/bzrlib/delta.h 2009-03-27 16:07:44 +0000
@@ -77,8 +77,10 @@
const void *buf, unsigned long bufsize,
unsigned long *delta_size, unsigned long max_delta_size);
-/* the smallest possible delta size is 4 bytes */
-#define DELTA_SIZE_MIN 4
+/* the smallest possible delta size is 3 bytes
+ * Target size, Copy command, Copy length
+ */
+#define DELTA_SIZE_MIN 3
/*
* This must be called twice on the delta data buffer, first to get the
=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c 2009-03-19 23:30:50 +0000
+++ b/bzrlib/diff-delta.c 2009-03-27 16:07:44 +0000
@@ -707,8 +707,6 @@
/* then populate the index for the new data */
prev_val = ~0;
data = buffer;
- /* source size */
- get_delta_hdr_size(&data, top);
/* target size */
get_delta_hdr_size(&data, top);
entry = entries; /* start at the first slot */
@@ -881,14 +879,7 @@
if (!out)
return NULL;
- /* store reference buffer size */
source_size = index->last_src->size + index->last_src->agg_offset;
- i = source_size;
- while (i >= 0x80) {
- out[outpos++] = i | 0x80;
- i >>= 7;
- }
- out[outpos++] = i;
/* store target buffer size */
i = trg_size;
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-30 21:30:33 +0000
+++ b/bzrlib/groupcompress.py 2009-03-31 09:57:05 +0000
@@ -56,8 +56,6 @@
)
_USE_LZMA = False and (pylzma is not None)
-_NO_LABELS = True
-_FAST = False
# osutils.sha_string('')
_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
@@ -160,9 +158,6 @@
# map by key? or just order in file?
self._entries = {}
self._compressor_name = None
- self._z_header_length = None
- self._header_length = None
- self._z_header = None
self._z_content = None
self._z_content_decompressor = None
self._z_content_length = None
@@ -170,39 +165,10 @@
self._content = None
def __len__(self):
- return self._content_length + self._header_length
-
- def _parse_header(self):
- """Parse the header part of the block."""
- assert self._z_header is not None
- if self._z_header == '':
- # Nothing to process
- self._z_header = None
- return
- if self._compressor_name == 'lzma':
- header = pylzma.decompress(self._z_header)
- else:
- assert self._compressor_name == 'zlib'
- header = zlib.decompress(self._z_header)
- self._z_header = None # We have consumed the header
- lines = header.split('\n')
- del header
- info_dict = {}
- for line in lines:
- if not line: #End of record
- if not info_dict:
- break
- self.add_entry(**info_dict)
- info_dict = {}
- continue
- key, value = line.split(':', 1)
- if key == 'key':
- value = tuple(map(intern, value.split('\x00')))
- elif key in ('start', 'length'):
- value = int(value)
- elif key == 'type':
- value = intern(value)
- info_dict[key] = value
+ # This is the maximum number of bytes this object will reference if
+ # everything is decompressed. However, if we decompress less than
+ # everything... (this would cause some problems for LRUSizeCache)
+ return self._content_length + self._z_content_length
def _ensure_content(self, num_bytes=None):
"""Make sure that content has been expanded enough.
@@ -277,48 +243,25 @@
# The stream is finished
self._z_content_decompressor = None
- def _parse_bytes(self, bytes):
+ def _parse_bytes(self, bytes, pos):
"""Read the various lengths from the header.
This also populates the various 'compressed' buffers.
:return: The position in bytes just after the last newline
"""
- # At present, there are 4 lengths to be read, we have 2 integers for
- # the length of the compressed and uncompressed header, and 2 integers
- # for the compressed and uncompressed content
- # 14 bytes can represent > 1TB, so to avoid checking too far, cap the
- # search to 14 bytes.
- pos = bytes.index('\n', 6, 20)
- self._z_header_length = int(bytes[6:pos])
- pos += 1
- pos2 = bytes.index('\n', pos, pos + 14)
- self._header_length = int(bytes[pos:pos2])
- end_of_z_lengths = pos2
- pos2 += 1
- # Older versions don't have the content lengths, if we want to preserve
- # backwards compatibility, we could try/except over these, and allow
- # them to be skipped
- try:
- pos = bytes.index('\n', pos2, pos2 + 14)
- self._z_content_length = int(bytes[pos2:pos])
- pos += 1
- pos2 = bytes.index('\n', pos, pos + 14)
- self._content_length = int(bytes[pos:pos2])
- pos = pos2 + 1
- assert len(bytes) == (pos + self._z_header_length +
- self._z_content_length)
- pos2 = pos + self._z_header_length
- self._z_header = bytes[pos:pos2]
- self._z_content = bytes[pos2:]
- assert len(self._z_content) == self._z_content_length
- except ValueError:
- # This is the older form, which did not encode its content length
- pos = end_of_z_lengths + 1
- pos2 = pos + self._z_header_length
- self._z_header = bytes[pos:pos2]
- self._z_content = bytes[pos2:]
- self._z_content_length = len(self._z_content)
+ # At present, we have 2 integers for the compressed and uncompressed
+ # content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid
+ # checking too far, cap the search to 14 bytes.
+ pos2 = bytes.index('\n', pos, pos + 14)
+ self._z_content_length = int(bytes[pos:pos2])
+ pos = pos2 + 1
+ pos2 = bytes.index('\n', pos, pos + 14)
+ self._content_length = int(bytes[pos:pos2])
+ pos = pos2 + 1
+ assert len(bytes) == (pos + self._z_content_length)
+ self._z_content = bytes[pos:]
+ assert len(self._z_content) == self._z_content_length
@classmethod
def from_bytes(cls, bytes):
@@ -331,9 +274,7 @@
out._compressor_name = 'lzma'
else:
raise ValueError('unknown compressor: %r' % (bytes,))
- out._parse_bytes(bytes)
- if not _NO_LABELS:
- out._parse_header()
+ out._parse_bytes(bytes, 6)
return out
def extract(self, key, start, end, sha1=None):
@@ -392,66 +333,24 @@
self._content_length = len(content)
self._content = content
self._z_content = None
- self._z_header_length = None
def to_bytes(self):
"""Encode the information into a byte stream."""
compress = zlib.compress
if _USE_LZMA:
compress = pylzma.compress
- chunks = []
- for key in sorted(self._entries):
- entry = self._entries[key]
- chunk = ('key:%s\n'
- 'sha1:%s\n'
- 'type:%s\n'
- 'start:%s\n'
- 'length:%s\n'
- '\n'
- ) % ('\x00'.join(entry.key),
- entry.sha1,
- entry.type,
- entry.start,
- entry.length,
- )
- chunks.append(chunk)
- bytes = ''.join(chunks)
- info_len = len(bytes)
- z_header_bytes = compress(bytes)
- del bytes, chunks
- z_header_len = len(z_header_bytes)
- # TODO: we may want to have the header compressed in the same chain
- # as the data, or we may not, evaulate it
- # having them compressed together is probably a win for
- # revisions and the 'inv' portion of chk inventories. As the
- # label in the header is duplicated in the text.
- # For chk pages and real bytes, I would guess this is not
- # true.
- if _NO_LABELS:
- z_header_bytes = ''
- z_header_len = 0
- info_len = 0
- if self._z_content is not None:
- content_len = self._content_length
- z_content_len = self._z_content_length
- z_content_bytes = self._z_content
- else:
+ if self._z_content is None:
assert self._content is not None
- content_len = self._content_length
- z_content_bytes = compress(self._content)
- self._z_content = z_content_bytes
- z_content_len = len(z_content_bytes)
- self._z_content_length = z_content_len
+ self._z_content = compress(self._content)
+ self._z_content_length = len(self._z_content)
if _USE_LZMA:
header = self.GCB_LZ_HEADER
else:
header = self.GCB_HEADER
chunks = [header,
- '%d\n%d\n%d\n%d\n' % (z_header_len, info_len,
- z_content_len, content_len)
+ '%d\n%d\n' % (self._z_content_length, self._content_length),
+ self._z_content,
]
- chunks.append(z_header_bytes)
- chunks.append(z_content_bytes)
return ''.join(chunks)
@@ -828,10 +727,7 @@
len_mini_header = 1 + len(enc_length)
length = len(delta) + len_mini_header
new_chunks = ['d', enc_length, delta]
- if _FAST:
- self._delta_index._source_offset += length
- else:
- self._delta_index.add_delta_source(delta, len_mini_header)
+ self._delta_index.add_delta_source(delta, len_mini_header)
self._block.add_entry(key, type=type, sha1=sha1,
start=self.endpoint, length=length)
start = self.endpoint
@@ -1463,6 +1359,9 @@
value = "%d %d %d %d" % (block_start, block_length,
record._start, record._end)
nodes = [(record.key, value, (record.parents,))]
+ # TODO: Consider buffering up many nodes to be added, not
+ # sure how much overhead this has, but we're seeing
+ # ~23s / 120s in add_records calls
self._index.add_records(nodes, random_id=random_id)
continue
try:
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-03-27 16:36:50 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-03-31 09:57:05 +0000
@@ -725,42 +725,6 @@
return packer.pack(pb)
-# This format has been disabled for now. It is not expected that this will be a
-# useful next-generation format.
-#
-# class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
-# """A B+Tree index using pack repository."""
-#
-# repository_class = GCPackRepository
-# rich_root_data = False
-# # Note: We cannot unpack a delta that references a text we haven't
-# # seen yet. There are 2 options, work in fulltexts, or require
-# # topological sorting. Using fulltexts is more optimal for local
-# # operations, because the source can be smart about extracting
-# # multiple in-a-row (and sharing strings). Topological is better
-# # for remote, because we access less data.
-# _fetch_order = 'unordered'
-# _fetch_uses_deltas = False
-#
-# def _get_matching_bzrdir(self):
-# return bzrdir.format_registry.make_bzrdir('gc-no-rich-root')
-#
-# def _ignore_setting_bzrdir(self, format):
-# pass
-#
-# _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
-#
-# def get_format_string(self):
-# """See RepositoryFormat.get_format_string()."""
-# return ("Bazaar development format - btree+gc "
-# "(needs bzr.dev from 1.13)\n")
-#
-# def get_format_description(self):
-# """See RepositoryFormat.get_format_description()."""
-# return ("Development repository format - btree+groupcompress "
-# ", interoperates with pack-0.92\n")
-#
-
class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
"""A hashed CHK+group compress pack repository."""
@@ -790,7 +754,7 @@
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
return ('Bazaar development format - hash16chk+gc rich-root'
- ' (needs bzr.dev from 1.13)\n')
+ ' (needs bzr.dev from 1.14)\n')
def get_format_description(self):
"""See RepositoryFormat.get_format_description()."""
@@ -827,7 +791,7 @@
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
return ('Bazaar development format - hash255chk+gc rich-root'
- ' (needs bzr.dev from 1.13)\n')
+ ' (needs bzr.dev from 1.14)\n')
def get_format_description(self):
"""See RepositoryFormat.get_format_description()."""
@@ -872,7 +836,7 @@
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
return ('Bazaar development format - hash255chk+gc rich-root bigpage'
- ' (needs bzr.dev from 1.13)\n')
+ ' (needs bzr.dev from 1.14)\n')
def get_format_description(self):
"""See RepositoryFormat.get_format_description()."""
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-03-31 07:43:43 +0000
+++ b/bzrlib/repository.py 2009-03-31 10:02:41 +0000
@@ -3033,24 +3033,18 @@
'bzrlib.repofmt.pack_repo',
'RepositoryFormatPackDevelopment5Hash255',
)
-# XXX: This format is scheduled for termination
-# format_registry.register_lazy(
-# 'Bazaar development format - btree+gc (needs bzr.dev from 1.13)\n',
-# 'bzrlib.repofmt.groupcompress_repo',
-# 'RepositoryFormatPackGCPlain',
-# )
format_registry.register_lazy(
- 'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.13)\n',
+ 'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.14)\n',
'bzrlib.repofmt.groupcompress_repo',
'RepositoryFormatPackGCCHK16',
)
format_registry.register_lazy(
- 'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.13)\n',
+ 'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.14)\n',
'bzrlib.repofmt.groupcompress_repo',
'RepositoryFormatPackGCCHK255',
)
format_registry.register_lazy(
- 'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.13)\n',
+ 'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.14)\n',
'bzrlib.repofmt.groupcompress_repo',
'RepositoryFormatPackGCCHK255Big',
)
=== modified file 'bzrlib/tests/test__groupcompress_pyx.py'
--- a/bzrlib/tests/test__groupcompress_pyx.py 2009-03-25 07:54:11 +0000
+++ b/bzrlib/tests/test__groupcompress_pyx.py 2009-03-31 09:57:05 +0000
@@ -123,40 +123,40 @@
def test_make_noop_delta(self):
ident_delta = self.make_delta(_text1, _text1)
- self.assertEqual('MM\x90M', ident_delta)
+ self.assertEqual('M\x90M', ident_delta)
ident_delta = self.make_delta(_text2, _text2)
- self.assertEqual('NN\x90N', ident_delta)
+ self.assertEqual('N\x90N', ident_delta)
ident_delta = self.make_delta(_text3, _text3)
- self.assertEqual('\x87\x01\x87\x01\x90\x87', ident_delta)
+ self.assertEqual('\x87\x01\x90\x87', ident_delta)
def test_make_delta(self):
delta = self.make_delta(_text1, _text2)
- self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+ self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
delta = self.make_delta(_text2, _text1)
- self.assertEqual('NM\x90/\x1ebe matched\nagainst other text\n', delta)
+ self.assertEqual('M\x90/\x1ebe matched\nagainst other text\n', delta)
delta = self.make_delta(_text3, _text1)
- self.assertEqual('\x87\x01M\x90M', delta)
+ self.assertEqual('M\x90M', delta)
delta = self.make_delta(_text3, _text2)
- self.assertEqual('\x87\x01N\x90/\x1fdiffer from\nagainst other text\n',
+ self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n',
delta)
def test_apply_delta_is_typesafe(self):
- self.apply_delta(_text1, 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, object(), 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, unicode(_text1), 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, _text1, u'MM\x90M')
+ self.apply_delta(_text1, 'M\x90M')
+ self.assertRaises(TypeError,
+ self.apply_delta, object(), 'M\x90M')
+ self.assertRaises(TypeError,
+ self.apply_delta, unicode(_text1), 'M\x90M')
+ self.assertRaises(TypeError,
+ self.apply_delta, _text1, u'M\x90M')
self.assertRaises(TypeError,
self.apply_delta, _text1, object())
def test_apply_delta(self):
target = self.apply_delta(_text1,
- 'MN\x90/\x1fdiffer from\nagainst other text\n')
+ 'N\x90/\x1fdiffer from\nagainst other text\n')
self.assertEqual(_text2, target)
target = self.apply_delta(_text2,
- 'NM\x90/\x1ebe matched\nagainst other text\n')
+ 'M\x90/\x1ebe matched\nagainst other text\n')
self.assertEqual(_text1, target)
@@ -169,7 +169,7 @@
def test_make_delta(self):
di = self._gc_module.DeltaIndex(_text1)
delta = di.make_delta(_text2)
- self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+ self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
def test_delta_against_multiple_sources(self):
di = self._gc_module.DeltaIndex()
@@ -180,7 +180,7 @@
delta = di.make_delta(_third_text)
result = self._gc_module.apply_delta(_first_text + _second_text, delta)
self.assertEqualDiff(_third_text, result)
- self.assertEqual('\xac\x01\x85\x01\x90\x14\x0chas some in '
+ self.assertEqual('\x85\x01\x90\x14\x0chas some in '
'\x91v6\x03and\x91d"\x91:\n', delta)
def test_delta_with_offsets(self):
@@ -196,7 +196,7 @@
'12345' + _first_text + '1234567890' + _second_text, delta)
self.assertIsNot(None, result)
self.assertEqualDiff(_third_text, result)
- self.assertEqual('\xbb\x01\x85\x01\x91\x05\x14\x0chas some in '
+ self.assertEqual('\x85\x01\x91\x05\x14\x0chas some in '
'\x91\x856\x03and\x91s"\x91?\n', delta)
def test_delta_with_delta_bytes(self):
@@ -205,7 +205,7 @@
di.add_source(_first_text, 0)
self.assertEqual(len(_first_text), di._source_offset)
delta = di.make_delta(_second_text)
- self.assertEqual('Dh\tsome more\x91\x019'
+ self.assertEqual('h\tsome more\x91\x019'
'&previous text\nand has some extra text\n', delta)
di.add_delta_source(delta, 0)
source += delta
@@ -218,8 +218,8 @@
# Note that we don't match the 'common with the', because it isn't long
# enough to match in the original text, and those bytes are not present
# in the delta for the second text.
- self.assertEqual('z\x85\x01\x90\x14\x1chas some in common with the '
- '\x91T&\x03and\x91\x18,', second_delta)
+ self.assertEqual('\x85\x01\x90\x14\x1chas some in common with the '
+ '\x91S&\x03and\x91\x18,', second_delta)
# Add this delta, and create a new delta for the same text. We should
# find the remaining text, and only insert the short 'and' text.
di.add_delta_source(second_delta, 0)
@@ -227,14 +227,14 @@
third_delta = di.make_delta(_third_text)
result = self._gc_module.apply_delta(source, third_delta)
self.assertEqualDiff(_third_text, result)
- self.assertEqual('\xa6\x01\x85\x01\x90\x14\x91\x80\x1c'
- '\x91T&\x03and\x91\x18,', third_delta)
+ self.assertEqual('\x85\x01\x90\x14\x91\x7e\x1c'
+ '\x91S&\x03and\x91\x18,', third_delta)
# Now create a delta, which we know won't be able to be 'fit' into the
# existing index
fourth_delta = di.make_delta(_fourth_text)
self.assertEqual(_fourth_text,
self._gc_module.apply_delta(source, fourth_delta))
- self.assertEqual('\xa6\x01\x80\x01'
+ self.assertEqual('\x80\x01'
'\x7f123456789012345\nsame rabin hash\n'
'123456789012345\nsame rabin hash\n'
'123456789012345\nsame rabin hash\n'
@@ -246,4 +246,4 @@
fifth_delta = di.make_delta(_fourth_text)
self.assertEqual(_fourth_text,
self._gc_module.apply_delta(source, fifth_delta))
- self.assertEqual('\xac\x02\x80\x01\x91\xab\x7f\x01\n', fifth_delta)
+ self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-25 07:54:11 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-31 09:57:05 +0000
@@ -100,9 +100,9 @@
'different\n'), sha1_2)
expected_lines.extend([
# 'delta', delta length
- 'd\x10',
- # source and target length
- '\x36\x36',
+ 'd\x0f',
+ # target length
+ '\x36',
# copy the line common
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
# add the line different, and the trailing newline
@@ -130,9 +130,9 @@
sha1_3)
expected_lines.extend([
# 'delta', delta length
- 'd\x0c',
- # source and target length
- '\x67\x5f'
+ 'd\x0b',
+ # target length
+ '\x5f'
# insert new
'\x03new',
# Copy of first parent 'common' range
@@ -229,7 +229,7 @@
def test_from_minimal_bytes(self):
block = groupcompress.GroupCompressBlock.from_bytes(
- 'gcb1z\n0\n0\n0\n0\n')
+ 'gcb1z\n0\n0\n')
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
self.assertEqual({}, block._entries)
self.assertIs(None, block._content)
@@ -239,70 +239,21 @@
self.assertEqual('', block._z_content)
block._ensure_content() # Ensure content is safe to call 2x
- def test_from_bytes_with_labels(self):
- header = ('key:bing\n'
- 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
- 'type:fulltext\n'
- 'start:100\n'
- 'length:100\n'
- '\n'
- 'key:foo\x00bar\n'
- 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
- 'type:fulltext\n'
- 'start:0\n'
- 'length:100\n'
- '\n')
- z_header = zlib.compress(header)
+ def test_from_bytes(self):
content = ('a tiny bit of content\n')
z_content = zlib.compress(content)
z_bytes = (
'gcb1z\n' # group compress block v1 plain
- '%d\n' # Length of zlib bytes
- '%d\n' # Length of all meta-info
'%d\n' # Length of compressed content
'%d\n' # Length of uncompressed content
- '%s' # Compressed header
'%s' # Compressed content
- ) % (len(z_header), len(header),
- len(z_content), len(content),
- z_header, z_content)
+ ) % (len(z_content), len(content), z_content)
block = groupcompress.GroupCompressBlock.from_bytes(
z_bytes)
- block._parse_header()
- self.assertIsInstance(block, groupcompress.GroupCompressBlock)
- self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
- bing = block._entries[('bing',)]
- self.assertEqual(('bing',), bing.key)
- self.assertEqual('fulltext', bing.type)
- self.assertEqual('abcd'*10, bing.sha1)
- self.assertEqual(100, bing.start)
- self.assertEqual(100, bing.length)
- foobar = block._entries[('foo', 'bar')]
- self.assertEqual(('foo', 'bar'), foobar.key)
- self.assertEqual('fulltext', foobar.type)
- self.assertEqual('abcd'*10, foobar.sha1)
- self.assertEqual(0, foobar.start)
- self.assertEqual(100, foobar.length)
self.assertEqual(z_content, block._z_content)
self.assertIs(None, block._content)
- block._ensure_content()
- self.assertEqual(z_content, block._z_content)
- self.assertEqual(content, block._content)
-
- def test_from_old_bytes(self):
- # Backwards compatibility, with groups that didn't define content length
- content = ('a tiny bit of content\n')
- z_content = zlib.compress(content)
- z_bytes = (
- 'gcb1z\n' # group compress block v1 plain
- '0\n' # Length of zlib bytes
- '0\n' # Length of all meta-info
- '' # Compressed header
- '%s' # Compressed content
- ) % (z_content)
- block = groupcompress.GroupCompressBlock.from_bytes(
- z_bytes)
- self.assertIsInstance(block, groupcompress.GroupCompressBlock)
+ self.assertEqual(len(z_content), block._z_content_length)
+ self.assertEqual(len(content), block._content_length)
block._ensure_content()
self.assertEqual(z_content, block._z_content)
self.assertEqual(content, block._content)
@@ -318,38 +269,23 @@
self.assertEqual(100, e.length)
def test_to_bytes(self):
- no_labels = groupcompress._NO_LABELS
- def reset():
- groupcompress._NO_LABELS = no_labels
- self.addCleanup(reset)
- groupcompress._NO_LABELS = False
+ content = ('this is some content\n'
+ 'this content will be compressed\n')
gcb = groupcompress.GroupCompressBlock()
gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
- gcb.set_content('this is some content\n'
- 'this content will be compressed\n')
+ gcb.set_content(content)
bytes = gcb.to_bytes()
+ self.assertEqual(gcb._z_content_length, len(gcb._z_content))
+ self.assertEqual(gcb._content_length, len(content))
expected_header =('gcb1z\n' # group compress block v1 zlib
- '76\n' # Length of compressed bytes
- '183\n' # Length of uncompressed meta-info
- '50\n' # Length of compressed content
- '53\n' # Length of uncompressed content
- )
+ '%d\n' # Length of compressed content
+ '%d\n' # Length of uncompressed content
+ ) % (gcb._z_content_length, gcb._content_length)
self.assertStartsWith(bytes, expected_header)
remaining_bytes = bytes[len(expected_header):]
raw_bytes = zlib.decompress(remaining_bytes)
- self.assertEqualDiff('key:bing\n'
- 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
- 'type:fulltext\n'
- 'start:100\n'
- 'length:100\n'
- '\n'
- 'key:foo\x00bar\n'
- 'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
- 'type:fulltext\n'
- 'start:0\n'
- 'length:100\n'
- '\n', raw_bytes)
+ self.assertEqual(content, raw_bytes)
def test_partial_decomp(self):
content_chunks = []
@@ -595,7 +531,7 @@
('key3',): "yet another text which won't be extracted\n"
"with a reasonable amount of compressible bytes\n",
('key4',): "this will be extracted\n"
- "but references bytes from\n"
+ "but references most of its bytes from\n"
"yet another text which won't be extracted\n"
"with a reasonable amount of compressible bytes\n",
}
@@ -681,7 +617,7 @@
self.assertEqualDiff('key1\n'
'\n' # no parents
'%d\n' # start offset
- '%d\n' # end byte
+ '%d\n' # end offset
'key4\n'
'\n'
'%d\n'
More information about the bazaar-commits
mailing list