Rev 3894: Remove support for passing None for end in GroupCompressBlock.extract. in http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

Fri Mar 20 15:43:55 GMT 2009

At http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

------------------------------------------------------------
revno: 3894
revision-id: john at arbash-meinel.com-20090320154310-q5ye037radsy052j
parent: john at arbash-meinel.com-20090320032107-bm9wg421rtcacy5i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: brisbane-core
timestamp: Fri 2009-03-20 10:43:10 -0500
message:
  Remove support for passing None for end in GroupCompressBlock.extract.
  
  I decided the removal of the extra int in wire-bytes and indices was not a worthy
  trade-off versus the ability to _prepare_for_extract and cheaply filter bytes
  during fetch. And it makes the code simpler/easier to maintain.
  
  Also, add support for having a 'empty content' record, which has start=end=0.
  Support costs very little, and simplifies things.
  And now GroupCompressBlock.extract() just returns the bytes. It doesn't try to
  sha the content, nor does it return a GCBEntry. We weren't using it anyway.
  And it can save ~50 seconds of sha-ing all the content during 'bzr pack' of
  a launchpad branch.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'

--- a/bzrlib/groupcompress.py	2009-03-19 03:06:02 +0000
+++ b/bzrlib/groupcompress.py	2009-03-20 15:43:10 +0000
@@ -339,20 +339,11 @@
         :param sha1: TODO (should we validate only when sha1 is supplied?)
         :return: The bytes for the content
         """
-        # Make sure we have enough bytes for this record
-        # TODO: if we didn't want to track the end of this entry, we could
-        #       _ensure_content(start+enough_bytes_for_type_and_length), and
-        #       then decode the entry length, and
-        #       _ensure_content(start+1+length)
-        #       It is 2 calls to _ensure_content(), but we always buffer a bit
-        #       extra anyway, and it means 1 less offset stored in the index,
-        #       and transmitted over the wire
-        if end is None:
-            # it takes 5 bytes to encode 2^32, so we need 1 byte to hold the
-            # 'f' or 'd' declaration, and then 5 more for the record length.
-            self._ensure_content(start + 6)
-        else:
-            self._ensure_content(end)
+        # Handle the 'Empty Content' record, even if we don't always write it
+        # yet.
+        if start == end == 0:
+            return ''
+        self._ensure_content(end)
         # The bytes are 'f' or 'd' for the type, then a variable-length
         # base128 integer for the content size, then the actual content
         # We know that the variable-length integer won't be longer than 5
@@ -368,23 +359,15 @@
         content_len, len_len = decode_base128_int(
                             self._content[start + 1:start + 6])
         content_start = start + 1 + len_len
-        if end is None:
-            end = content_start + content_len
-            self._ensure_content(end)
-        else:
-            if end != content_start + content_len:
-                raise ValueError('end != len according to field header'
-                    ' %s != %s' % (end, content_start + content_len))
-        entry = GroupCompressBlockEntry(key, type, sha1=None,
-                                        start=start, length=end-start)
+        if end != content_start + content_len:
+            raise ValueError('end != len according to field header'
+                ' %s != %s' % (end, content_start + content_len))
         content = self._content[content_start:end]
         if c == 'f':
             bytes = content
         elif c == 'd':
             bytes = _groupcompress_pyx.apply_delta(self._content, content)
-        if entry.sha1 is None:
-            entry.sha1 = sha_string(bytes)
-        return entry, bytes
+        return bytes
 
     def add_entry(self, key, type, sha1, start, length):
         """Add new meta info about an entry.
@@ -515,7 +498,7 @@
         if storage_kind in ('fulltext', 'chunked'):
             self._manager._prepare_for_extract()
             block = self._manager._block
-            _, bytes = block.extract(self.key, self._start, self._end)
+            bytes = block.extract(self.key, self._start, self._end)
             if storage_kind == 'fulltext':
                 return bytes
             else:

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-19 03:06:02 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-20 15:43:10 +0000
@@ -329,21 +329,6 @@
                              'length:100\n'
                              '\n', raw_bytes)
 
-    def test_extract_no_end(self):
-        # We should be able to extract a record, even if we only know the start
-        # of the bytes.
-        texts = {
-            ('key1',): 'text for key1\nhas bytes that are common\n',
-            ('key2',): 'text for key2\nhas bytes that are common\n',
-        }
-        entries, block = self.make_block(texts)
-        self.assertEqualDiff('text for key1\nhas bytes that are common\n',
-                             block.extract(('key1',), entries[('key1',)].start,
-                                           end=None)[1])
-        self.assertEqualDiff('text for key2\nhas bytes that are common\n',
-                             block.extract(('key2',), entries[('key2',)].start,
-                                           end=None)[1])
-
     def test_partial_decomp(self):
         content_chunks = []
         # We need a sufficient amount of data so that zlib.decompress has