Rev 4220: merge bbc at 3909 in file:///home/vila/src/bzr/experimental/gc-py-bbc/

Vincent Ladeuil v.ladeuil+lp at free.fr
Tue Mar 31 11:02:42 BST 2009


At file:///home/vila/src/bzr/experimental/gc-py-bbc/

------------------------------------------------------------
revno: 4220 [merge]
revision-id: v.ladeuil+lp at free.fr-20090331100241-60fp95ukghur25dk
parent: v.ladeuil+lp at free.fr-20090331074343-wghocs28bnzbjlh2
parent: v.ladeuil+lp at free.fr-20090331095705-vqt1dsw9srrjne11
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: bbc
timestamp: Tue 2009-03-31 12:02:41 +0200
message:
  merge bbc at 3909
modified:
  bzrlib/_groupcompress_pyx.pyx  _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
  bzrlib/delta.h                 delta.h-20090227173129-qsu3u43vowf1q3ay-1
  bzrlib/diff-delta.c            diffdelta.c-20090226042143-l9wzxynyuxnb5hus-1
  bzrlib/groupcompress.py        groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
  bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test__groupcompress_pyx.py test__groupcompress_-20080724145854-koifwb7749cfzrvj-1
  bzrlib/tests/test_groupcompress.py test_groupcompress.p-20080705181503-ccbxd6xuy1bdnrpu-13
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2009-03-24 20:02:26 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2009-03-27 16:07:44 +0000
@@ -251,15 +251,6 @@
     data = <unsigned char *>delta
     top = data + delta_size
 
-    # make sure the orig file size matches what we expect
-    # XXX: gcc warns because data isn't defined as 'const'
-    size = get_delta_hdr_size(&data, top)
-    if (size > source_size):
-        # XXX: mismatched source size
-        raise RuntimeError('source size %d < expected source size %d'
-                           % (source_size, size))
-    source_size = size
-
     # now the result size
     size = get_delta_hdr_size(&data, top)
     result = PyString_FromStringAndSize(NULL, size)

=== modified file 'bzrlib/delta.h'
--- a/bzrlib/delta.h	2009-03-26 16:22:58 +0000
+++ b/bzrlib/delta.h	2009-03-27 16:07:44 +0000
@@ -77,8 +77,10 @@
          const void *buf, unsigned long bufsize,
          unsigned long *delta_size, unsigned long max_delta_size);
 
-/* the smallest possible delta size is 4 bytes */
-#define DELTA_SIZE_MIN  4
+/* the smallest possible delta size is 3 bytes
+ * Target size, Copy command, Copy length
+ */
+#define DELTA_SIZE_MIN  3
 
 /*
  * This must be called twice on the delta data buffer, first to get the

=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c	2009-03-19 23:30:50 +0000
+++ b/bzrlib/diff-delta.c	2009-03-27 16:07:44 +0000
@@ -707,8 +707,6 @@
     /* then populate the index for the new data */
     prev_val = ~0;
     data = buffer;
-    /* source size */
-    get_delta_hdr_size(&data, top);
     /* target size */
     get_delta_hdr_size(&data, top);
     entry = entries; /* start at the first slot */
@@ -881,14 +879,7 @@
     if (!out)
         return NULL;
 
-    /* store reference buffer size */
     source_size = index->last_src->size + index->last_src->agg_offset;
-    i = source_size;
-    while (i >= 0x80) {
-        out[outpos++] = i | 0x80;
-        i >>= 7;
-    }
-    out[outpos++] = i;
 
     /* store target buffer size */
     i = trg_size;

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-30 21:30:33 +0000
+++ b/bzrlib/groupcompress.py	2009-03-31 09:57:05 +0000
@@ -56,8 +56,6 @@
     )
 
 _USE_LZMA = False and (pylzma is not None)
-_NO_LABELS = True
-_FAST = False
 
 # osutils.sha_string('')
 _null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
@@ -160,9 +158,6 @@
         # map by key? or just order in file?
         self._entries = {}
         self._compressor_name = None
-        self._z_header_length = None
-        self._header_length = None
-        self._z_header = None
         self._z_content = None
         self._z_content_decompressor = None
         self._z_content_length = None
@@ -170,39 +165,10 @@
         self._content = None
 
     def __len__(self):
-        return self._content_length + self._header_length
-
-    def _parse_header(self):
-        """Parse the header part of the block."""
-        assert self._z_header is not None
-        if self._z_header == '':
-            # Nothing to process
-            self._z_header = None
-            return
-        if self._compressor_name == 'lzma':
-            header = pylzma.decompress(self._z_header)
-        else:
-            assert self._compressor_name == 'zlib'
-            header = zlib.decompress(self._z_header)
-        self._z_header = None # We have consumed the header
-        lines = header.split('\n')
-        del header
-        info_dict = {}
-        for line in lines:
-            if not line: #End of record
-                if not info_dict:
-                    break
-                self.add_entry(**info_dict)
-                info_dict = {}
-                continue
-            key, value = line.split(':', 1)
-            if key == 'key':
-                value = tuple(map(intern, value.split('\x00')))
-            elif key in ('start', 'length'):
-                value = int(value)
-            elif key == 'type':
-                value = intern(value)
-            info_dict[key] = value
+        # This is the maximum number of bytes this object will reference if
+        # everything is decompressed. However, if we decompress less than
+        # everything... (this would cause some problems for LRUSizeCache)
+        return self._content_length + self._z_content_length
 
     def _ensure_content(self, num_bytes=None):
         """Make sure that content has been expanded enough.
@@ -277,48 +243,25 @@
                 # The stream is finished
                 self._z_content_decompressor = None
 
-    def _parse_bytes(self, bytes):
+    def _parse_bytes(self, bytes, pos):
         """Read the various lengths from the header.
 
         This also populates the various 'compressed' buffers.
 
         :return: The position in bytes just after the last newline
         """
-        # At present, there are 4 lengths to be read, we have 2 integers for
-        # the length of the compressed and uncompressed header, and 2 integers
-        # for the compressed and uncompressed content
-        # 14 bytes can represent > 1TB, so to avoid checking too far, cap the
-        # search to 14 bytes.
-        pos = bytes.index('\n', 6, 20)
-        self._z_header_length = int(bytes[6:pos])
-        pos += 1
-        pos2 = bytes.index('\n', pos, pos + 14)
-        self._header_length = int(bytes[pos:pos2])
-        end_of_z_lengths = pos2
-        pos2 += 1
-        # Older versions don't have the content lengths, if we want to preserve
-        # backwards compatibility, we could try/except over these, and allow
-        # them to be skipped
-        try:
-            pos = bytes.index('\n', pos2, pos2 + 14)
-            self._z_content_length = int(bytes[pos2:pos])
-            pos += 1
-            pos2 = bytes.index('\n', pos, pos + 14)
-            self._content_length = int(bytes[pos:pos2])
-            pos = pos2 + 1
-            assert len(bytes) == (pos + self._z_header_length +
-                                  self._z_content_length)
-            pos2 = pos + self._z_header_length
-            self._z_header = bytes[pos:pos2]
-            self._z_content = bytes[pos2:]
-            assert len(self._z_content) == self._z_content_length
-        except ValueError:
-            # This is the older form, which did not encode its content length
-            pos = end_of_z_lengths + 1
-            pos2 = pos + self._z_header_length
-            self._z_header = bytes[pos:pos2]
-            self._z_content = bytes[pos2:]
-            self._z_content_length = len(self._z_content)
+        # At present, we have 2 integers for the compressed and uncompressed
+        # content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid
+        # checking too far, cap the search to 14 bytes.
+        pos2 = bytes.index('\n', pos, pos + 14)
+        self._z_content_length = int(bytes[pos:pos2])
+        pos = pos2 + 1
+        pos2 = bytes.index('\n', pos, pos + 14)
+        self._content_length = int(bytes[pos:pos2])
+        pos = pos2 + 1
+        assert len(bytes) == (pos + self._z_content_length)
+        self._z_content = bytes[pos:]
+        assert len(self._z_content) == self._z_content_length
 
     @classmethod
     def from_bytes(cls, bytes):
@@ -331,9 +274,7 @@
             out._compressor_name = 'lzma'
         else:
             raise ValueError('unknown compressor: %r' % (bytes,))
-        out._parse_bytes(bytes)
-        if not _NO_LABELS:
-            out._parse_header()
+        out._parse_bytes(bytes, 6)
         return out
 
     def extract(self, key, start, end, sha1=None):
@@ -392,66 +333,24 @@
         self._content_length = len(content)
         self._content = content
         self._z_content = None
-        self._z_header_length = None
 
     def to_bytes(self):
         """Encode the information into a byte stream."""
         compress = zlib.compress
         if _USE_LZMA:
             compress = pylzma.compress
-        chunks = []
-        for key in sorted(self._entries):
-            entry = self._entries[key]
-            chunk = ('key:%s\n'
-                     'sha1:%s\n'
-                     'type:%s\n'
-                     'start:%s\n'
-                     'length:%s\n'
-                     '\n'
-                     ) % ('\x00'.join(entry.key),
-                          entry.sha1,
-                          entry.type,
-                          entry.start,
-                          entry.length,
-                          )
-            chunks.append(chunk)
-        bytes = ''.join(chunks)
-        info_len = len(bytes)
-        z_header_bytes = compress(bytes)
-        del bytes, chunks
-        z_header_len = len(z_header_bytes)
-        # TODO: we may want to have the header compressed in the same chain
-        #       as the data, or we may not, evaulate it
-        #       having them compressed together is probably a win for
-        #       revisions and the 'inv' portion of chk inventories. As the
-        #       label in the header is duplicated in the text.
-        #       For chk pages and real bytes, I would guess this is not
-        #       true.
-        if _NO_LABELS:
-            z_header_bytes = ''
-            z_header_len = 0
-            info_len = 0
-        if self._z_content is not None:
-            content_len = self._content_length
-            z_content_len = self._z_content_length
-            z_content_bytes = self._z_content
-        else:
+        if self._z_content is None:
             assert self._content is not None
-            content_len = self._content_length
-            z_content_bytes = compress(self._content)
-            self._z_content = z_content_bytes
-            z_content_len = len(z_content_bytes)
-            self._z_content_length = z_content_len
+            self._z_content = compress(self._content)
+            self._z_content_length = len(self._z_content)
         if _USE_LZMA:
             header = self.GCB_LZ_HEADER
         else:
             header = self.GCB_HEADER
         chunks = [header,
-                  '%d\n%d\n%d\n%d\n' % (z_header_len, info_len,
-                                        z_content_len, content_len)
+                  '%d\n%d\n' % (self._z_content_length, self._content_length),
+                  self._z_content,
                  ]
-        chunks.append(z_header_bytes)
-        chunks.append(z_content_bytes)
         return ''.join(chunks)
 
 
@@ -828,10 +727,7 @@
             len_mini_header = 1 + len(enc_length)
             length = len(delta) + len_mini_header
             new_chunks = ['d', enc_length, delta]
-            if _FAST:
-                self._delta_index._source_offset += length
-            else:
-                self._delta_index.add_delta_source(delta, len_mini_header)
+            self._delta_index.add_delta_source(delta, len_mini_header)
         self._block.add_entry(key, type=type, sha1=sha1,
                               start=self.endpoint, length=length)
         start = self.endpoint
@@ -1463,6 +1359,9 @@
                     value = "%d %d %d %d" % (block_start, block_length,
                                              record._start, record._end)
                     nodes = [(record.key, value, (record.parents,))]
+                    # TODO: Consider buffering up many nodes to be added, not
+                    #       sure how much overhead this has, but we're seeing
+                    #       ~23s / 120s in add_records calls
                     self._index.add_records(nodes, random_id=random_id)
                     continue
             try:

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-03-27 16:36:50 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-03-31 09:57:05 +0000
@@ -725,42 +725,6 @@
         return packer.pack(pb)
 
 
-# This format has been disabled for now. It is not expected that this will be a
-# useful next-generation format.
-#
-# class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
-#     """A B+Tree index using pack repository."""
-#
-#     repository_class = GCPackRepository
-#     rich_root_data = False
-#     # Note: We cannot unpack a delta that references a text we haven't
-#     # seen yet. There are 2 options, work in fulltexts, or require
-#     # topological sorting. Using fulltexts is more optimal for local
-#     # operations, because the source can be smart about extracting
-#     # multiple in-a-row (and sharing strings). Topological is better
-#     # for remote, because we access less data.
-#     _fetch_order = 'unordered'
-#     _fetch_uses_deltas = False
-#
-#     def _get_matching_bzrdir(self):
-#         return bzrdir.format_registry.make_bzrdir('gc-no-rich-root')
-#
-#     def _ignore_setting_bzrdir(self, format):
-#         pass
-#
-#     _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
-#
-#     def get_format_string(self):
-#         """See RepositoryFormat.get_format_string()."""
-#         return ("Bazaar development format - btree+gc "
-#             "(needs bzr.dev from 1.13)\n")
-#
-#     def get_format_description(self):
-#         """See RepositoryFormat.get_format_description()."""
-#         return ("Development repository format - btree+groupcompress "
-#             ", interoperates with pack-0.92\n")
-#
-
 class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
     """A hashed CHK+group compress pack repository."""
 
@@ -790,7 +754,7 @@
     def get_format_string(self):
         """See RepositoryFormat.get_format_string()."""
         return ('Bazaar development format - hash16chk+gc rich-root'
-                ' (needs bzr.dev from 1.13)\n')
+                ' (needs bzr.dev from 1.14)\n')
 
     def get_format_description(self):
         """See RepositoryFormat.get_format_description()."""
@@ -827,7 +791,7 @@
     def get_format_string(self):
         """See RepositoryFormat.get_format_string()."""
         return ('Bazaar development format - hash255chk+gc rich-root'
-                ' (needs bzr.dev from 1.13)\n')
+                ' (needs bzr.dev from 1.14)\n')
 
     def get_format_description(self):
         """See RepositoryFormat.get_format_description()."""
@@ -872,7 +836,7 @@
     def get_format_string(self):
         """See RepositoryFormat.get_format_string()."""
         return ('Bazaar development format - hash255chk+gc rich-root bigpage'
-                ' (needs bzr.dev from 1.13)\n')
+                ' (needs bzr.dev from 1.14)\n')
 
     def get_format_description(self):
         """See RepositoryFormat.get_format_description()."""

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-03-31 07:43:43 +0000
+++ b/bzrlib/repository.py	2009-03-31 10:02:41 +0000
@@ -3033,24 +3033,18 @@
     'bzrlib.repofmt.pack_repo',
     'RepositoryFormatPackDevelopment5Hash255',
     )
-# XXX: This format is scheduled for termination
-# format_registry.register_lazy(
-#     'Bazaar development format - btree+gc (needs bzr.dev from 1.13)\n',
-#     'bzrlib.repofmt.groupcompress_repo',
-#     'RepositoryFormatPackGCPlain',
-#     )
 format_registry.register_lazy(
-    'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.13)\n',
+    'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.14)\n',
     'bzrlib.repofmt.groupcompress_repo',
     'RepositoryFormatPackGCCHK16',
     )
 format_registry.register_lazy(
-    'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.13)\n',
+    'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.14)\n',
     'bzrlib.repofmt.groupcompress_repo',
     'RepositoryFormatPackGCCHK255',
     )
 format_registry.register_lazy(
-    'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.13)\n',
+    'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.14)\n',
     'bzrlib.repofmt.groupcompress_repo',
     'RepositoryFormatPackGCCHK255Big',
     )

=== modified file 'bzrlib/tests/test__groupcompress_pyx.py'
--- a/bzrlib/tests/test__groupcompress_pyx.py	2009-03-25 07:54:11 +0000
+++ b/bzrlib/tests/test__groupcompress_pyx.py	2009-03-31 09:57:05 +0000
@@ -123,40 +123,40 @@
 
     def test_make_noop_delta(self):
         ident_delta = self.make_delta(_text1, _text1)
-        self.assertEqual('MM\x90M', ident_delta)
+        self.assertEqual('M\x90M', ident_delta)
         ident_delta = self.make_delta(_text2, _text2)
-        self.assertEqual('NN\x90N', ident_delta)
+        self.assertEqual('N\x90N', ident_delta)
         ident_delta = self.make_delta(_text3, _text3)
-        self.assertEqual('\x87\x01\x87\x01\x90\x87', ident_delta)
+        self.assertEqual('\x87\x01\x90\x87', ident_delta)
 
     def test_make_delta(self):
         delta = self.make_delta(_text1, _text2)
-        self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
         delta = self.make_delta(_text2, _text1)
-        self.assertEqual('NM\x90/\x1ebe matched\nagainst other text\n', delta)
+        self.assertEqual('M\x90/\x1ebe matched\nagainst other text\n', delta)
         delta = self.make_delta(_text3, _text1)
-        self.assertEqual('\x87\x01M\x90M', delta)
+        self.assertEqual('M\x90M', delta)
         delta = self.make_delta(_text3, _text2)
-        self.assertEqual('\x87\x01N\x90/\x1fdiffer from\nagainst other text\n',
+        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n',
                          delta)
 
     def test_apply_delta_is_typesafe(self):
-        self.apply_delta(_text1, 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, object(), 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, unicode(_text1), 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, _text1, u'MM\x90M')
+        self.apply_delta(_text1, 'M\x90M')
+        self.assertRaises(TypeError,
+            self.apply_delta, object(), 'M\x90M')
+        self.assertRaises(TypeError,
+            self.apply_delta, unicode(_text1), 'M\x90M')
+        self.assertRaises(TypeError,
+            self.apply_delta, _text1, u'M\x90M')
         self.assertRaises(TypeError,
             self.apply_delta, _text1, object())
 
     def test_apply_delta(self):
         target = self.apply_delta(_text1,
-                    'MN\x90/\x1fdiffer from\nagainst other text\n')
+                    'N\x90/\x1fdiffer from\nagainst other text\n')
         self.assertEqual(_text2, target)
         target = self.apply_delta(_text2,
-                    'NM\x90/\x1ebe matched\nagainst other text\n')
+                    'M\x90/\x1ebe matched\nagainst other text\n')
         self.assertEqual(_text1, target)
 
 
@@ -169,7 +169,7 @@
     def test_make_delta(self):
         di = self._gc_module.DeltaIndex(_text1)
         delta = di.make_delta(_text2)
-        self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
 
     def test_delta_against_multiple_sources(self):
         di = self._gc_module.DeltaIndex()
@@ -180,7 +180,7 @@
         delta = di.make_delta(_third_text)
         result = self._gc_module.apply_delta(_first_text + _second_text, delta)
         self.assertEqualDiff(_third_text, result)
-        self.assertEqual('\xac\x01\x85\x01\x90\x14\x0chas some in '
+        self.assertEqual('\x85\x01\x90\x14\x0chas some in '
                          '\x91v6\x03and\x91d"\x91:\n', delta)
 
     def test_delta_with_offsets(self):
@@ -196,7 +196,7 @@
             '12345' + _first_text + '1234567890' + _second_text, delta)
         self.assertIsNot(None, result)
         self.assertEqualDiff(_third_text, result)
-        self.assertEqual('\xbb\x01\x85\x01\x91\x05\x14\x0chas some in '
+        self.assertEqual('\x85\x01\x91\x05\x14\x0chas some in '
                          '\x91\x856\x03and\x91s"\x91?\n', delta)
 
     def test_delta_with_delta_bytes(self):
@@ -205,7 +205,7 @@
         di.add_source(_first_text, 0)
         self.assertEqual(len(_first_text), di._source_offset)
         delta = di.make_delta(_second_text)
-        self.assertEqual('Dh\tsome more\x91\x019'
+        self.assertEqual('h\tsome more\x91\x019'
                          '&previous text\nand has some extra text\n', delta)
         di.add_delta_source(delta, 0)
         source += delta
@@ -218,8 +218,8 @@
         # Note that we don't match the 'common with the', because it isn't long
         # enough to match in the original text, and those bytes are not present
         # in the delta for the second text.
-        self.assertEqual('z\x85\x01\x90\x14\x1chas some in common with the '
-                         '\x91T&\x03and\x91\x18,', second_delta)
+        self.assertEqual('\x85\x01\x90\x14\x1chas some in common with the '
+                         '\x91S&\x03and\x91\x18,', second_delta)
         # Add this delta, and create a new delta for the same text. We should
         # find the remaining text, and only insert the short 'and' text.
         di.add_delta_source(second_delta, 0)
@@ -227,14 +227,14 @@
         third_delta = di.make_delta(_third_text)
         result = self._gc_module.apply_delta(source, third_delta)
         self.assertEqualDiff(_third_text, result)
-        self.assertEqual('\xa6\x01\x85\x01\x90\x14\x91\x80\x1c'
-                         '\x91T&\x03and\x91\x18,', third_delta)
+        self.assertEqual('\x85\x01\x90\x14\x91\x7e\x1c'
+                         '\x91S&\x03and\x91\x18,', third_delta)
         # Now create a delta, which we know won't be able to be 'fit' into the
         # existing index
         fourth_delta = di.make_delta(_fourth_text)
         self.assertEqual(_fourth_text,
                          self._gc_module.apply_delta(source, fourth_delta))
-        self.assertEqual('\xa6\x01\x80\x01'
+        self.assertEqual('\x80\x01'
                          '\x7f123456789012345\nsame rabin hash\n'
                          '123456789012345\nsame rabin hash\n'
                          '123456789012345\nsame rabin hash\n'
@@ -246,4 +246,4 @@
         fifth_delta = di.make_delta(_fourth_text)
         self.assertEqual(_fourth_text,
                          self._gc_module.apply_delta(source, fifth_delta))
-        self.assertEqual('\xac\x02\x80\x01\x91\xab\x7f\x01\n', fifth_delta)
+        self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-25 07:54:11 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-31 09:57:05 +0000
@@ -100,9 +100,9 @@
                                     'different\n'), sha1_2)
         expected_lines.extend([
             # 'delta', delta length
-            'd\x10',
-            # source and target length
-            '\x36\x36',
+            'd\x0f',
+            # target length
+            '\x36',
             # copy the line common
             '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
             # add the line different, and the trailing newline
@@ -130,9 +130,9 @@
             sha1_3)
         expected_lines.extend([
             # 'delta', delta length
-            'd\x0c',
-            # source and target length
-            '\x67\x5f'
+            'd\x0b',
+            # target length
+            '\x5f'
             # insert new
             '\x03new',
             # Copy of first parent 'common' range
@@ -229,7 +229,7 @@
 
     def test_from_minimal_bytes(self):
         block = groupcompress.GroupCompressBlock.from_bytes(
-            'gcb1z\n0\n0\n0\n0\n')
+            'gcb1z\n0\n0\n')
         self.assertIsInstance(block, groupcompress.GroupCompressBlock)
         self.assertEqual({}, block._entries)
         self.assertIs(None, block._content)
@@ -239,70 +239,21 @@
         self.assertEqual('', block._z_content)
         block._ensure_content() # Ensure content is safe to call 2x
 
-    def test_from_bytes_with_labels(self):
-        header = ('key:bing\n'
-            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
-            'type:fulltext\n'
-            'start:100\n'
-            'length:100\n'
-            '\n'
-            'key:foo\x00bar\n'
-            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
-            'type:fulltext\n'
-            'start:0\n'
-            'length:100\n'
-            '\n')
-        z_header = zlib.compress(header)
+    def test_from_bytes(self):
         content = ('a tiny bit of content\n')
         z_content = zlib.compress(content)
         z_bytes = (
             'gcb1z\n' # group compress block v1 plain
-            '%d\n' # Length of zlib bytes
-            '%d\n' # Length of all meta-info
             '%d\n' # Length of compressed content
             '%d\n' # Length of uncompressed content
-            '%s'   # Compressed header
             '%s'   # Compressed content
-            ) % (len(z_header), len(header),
-                 len(z_content), len(content),
-                 z_header, z_content)
+            ) % (len(z_content), len(content), z_content)
         block = groupcompress.GroupCompressBlock.from_bytes(
             z_bytes)
-        block._parse_header()
-        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
-        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
-        bing = block._entries[('bing',)]
-        self.assertEqual(('bing',), bing.key)
-        self.assertEqual('fulltext', bing.type)
-        self.assertEqual('abcd'*10, bing.sha1)
-        self.assertEqual(100, bing.start)
-        self.assertEqual(100, bing.length)
-        foobar = block._entries[('foo', 'bar')]
-        self.assertEqual(('foo', 'bar'), foobar.key)
-        self.assertEqual('fulltext', foobar.type)
-        self.assertEqual('abcd'*10, foobar.sha1)
-        self.assertEqual(0, foobar.start)
-        self.assertEqual(100, foobar.length)
         self.assertEqual(z_content, block._z_content)
         self.assertIs(None, block._content)
-        block._ensure_content()
-        self.assertEqual(z_content, block._z_content)
-        self.assertEqual(content, block._content)
-
-    def test_from_old_bytes(self):
-        # Backwards compatibility, with groups that didn't define content length
-        content = ('a tiny bit of content\n')
-        z_content = zlib.compress(content)
-        z_bytes = (
-            'gcb1z\n' # group compress block v1 plain
-            '0\n' # Length of zlib bytes
-            '0\n' # Length of all meta-info
-            ''    # Compressed header
-            '%s'   # Compressed content
-            ) % (z_content)
-        block = groupcompress.GroupCompressBlock.from_bytes(
-            z_bytes)
-        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
+        self.assertEqual(len(z_content), block._z_content_length)
+        self.assertEqual(len(content), block._content_length)
         block._ensure_content()
         self.assertEqual(z_content, block._z_content)
         self.assertEqual(content, block._content)
@@ -318,38 +269,23 @@
         self.assertEqual(100, e.length)
 
     def test_to_bytes(self):
-        no_labels = groupcompress._NO_LABELS
-        def reset():
-            groupcompress._NO_LABELS = no_labels
-        self.addCleanup(reset)
-        groupcompress._NO_LABELS = False
+        content = ('this is some content\n'
+                   'this content will be compressed\n')
         gcb = groupcompress.GroupCompressBlock()
         gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
         gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
-        gcb.set_content('this is some content\n'
-                        'this content will be compressed\n')
+        gcb.set_content(content)
         bytes = gcb.to_bytes()
+        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
+        self.assertEqual(gcb._content_length, len(content))
         expected_header =('gcb1z\n' # group compress block v1 zlib
-                          '76\n' # Length of compressed bytes
-                          '183\n' # Length of uncompressed meta-info
-                          '50\n' # Length of compressed content
-                          '53\n' # Length of uncompressed content
-                         )
+                          '%d\n' # Length of compressed content
+                          '%d\n' # Length of uncompressed content
+                         ) % (gcb._z_content_length, gcb._content_length)
         self.assertStartsWith(bytes, expected_header)
         remaining_bytes = bytes[len(expected_header):]
         raw_bytes = zlib.decompress(remaining_bytes)
-        self.assertEqualDiff('key:bing\n'
-                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
-                             'type:fulltext\n'
-                             'start:100\n'
-                             'length:100\n'
-                             '\n'
-                             'key:foo\x00bar\n'
-                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
-                             'type:fulltext\n'
-                             'start:0\n'
-                             'length:100\n'
-                             '\n', raw_bytes)
+        self.assertEqual(content, raw_bytes)
 
     def test_partial_decomp(self):
         content_chunks = []
@@ -595,7 +531,7 @@
         ('key3',): "yet another text which won't be extracted\n"
                    "with a reasonable amount of compressible bytes\n",
         ('key4',): "this will be extracted\n"
-                   "but references bytes from\n"
+                   "but references most of its bytes from\n"
                    "yet another text which won't be extracted\n"
                    "with a reasonable amount of compressible bytes\n",
     }
@@ -681,7 +617,7 @@
         self.assertEqualDiff('key1\n'
                              '\n'  # no parents
                              '%d\n' # start offset
-                             '%d\n' # end byte
+                             '%d\n' # end offset
                              'key4\n'
                              '\n'
                              '%d\n'



More information about the bazaar-commits mailing list