Rev 4669: Refactor the check_rebuild code a bit, so that we can potentially in http://bazaar.launchpad.net/~jameinel/bzr/2.1b1-pack-on-the-fly

John Arbash Meinel john at arbash-meinel.com
Tue Sep 1 22:41:33 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1b1-pack-on-the-fly

------------------------------------------------------------
revno: 4669
revision-id: john at arbash-meinel.com-20090901214127-gs4xwl6t5q81k0hh
parent: john at arbash-meinel.com-20090901212729-1vetogsojqg1go75
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1b1-pack-on-the-fly
timestamp: Tue 2009-09-01 16:41:27 -0500
message:
  Refactor the check_rebuild code a bit, so that we can potentially
  not do the actual rebuilding, but just check that we want to.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-09-01 21:27:29 +0000
+++ b/bzrlib/groupcompress.py	2009-09-01 21:41:27 +0000
@@ -457,7 +457,6 @@
                 # There are code paths that first extract as fulltext, and then
                 # extract as storage_kind (smart fetch). So we don't break the
                 # refcycle here, but instead in manager.get_record_stream()
-                # self._manager = None
             if storage_kind == 'fulltext':
                 return self._bytes
             else:
@@ -546,22 +545,23 @@
         # time (self._block._content) is a little expensive.
         self._block._ensure_content(self._last_byte)
 
-    def _check_rebuild_block(self):
+    def _check_rebuild_action(self):
         """Check to see if our block should be repacked."""
         total_bytes_used = 0
         last_byte_used = 0
         for factory in self._factories:
             total_bytes_used += factory._end - factory._start
-            last_byte_used = max(last_byte_used, factory._end)
-        # If we are using most of the bytes from the block, we have nothing
-        # else to check (currently more than 1/2)
+            if last_byte_used < factory._end:
+                last_byte_used = factory._end
+        # If we are using more than half of the bytes from the block, we have
+        # nothing else to check
         if total_bytes_used * 2 >= self._block._content_length:
-            return
-        # Can we just strip off the trailing bytes? If we are going to be
-        # transmitting more than 50% of the front of the content, go ahead
+            return None, last_byte_used
+        # We are using less than 50% of the content. Is the content we are
+        # using at the beginning of the block? If so, we can just trim the
+        # tail, rather than rebuilding from scratch.
         if total_bytes_used * 2 > last_byte_used:
-            self._trim_block(last_byte_used)
-            return
+            return 'trim', last_byte_used
 
         # We are using a small amount of the data, and it isn't just packed
         # nicely at the front, so rebuild the content.
@@ -574,7 +574,18 @@
         #       expanding many deltas into fulltexts, as well.
         #       If we build a cheap enough 'strip', then we could try a strip,
         #       if that expands the content, we then rebuild.
-        self._rebuild_block()
+        return 'rebuild', last_byte_used
+
+    def _check_rebuild_block(self):
+        action, last_byte_used = self._check_rebuild_action()
+        if action is None:
+            return
+        if action == 'trim':
+            self._trim_block(last_byte_used)
+        elif action == 'rebuild':
+            self._rebuild_block()
+        else:
+            raise ValueError('unknown rebuild action: %r' % (action,))
 
     def _wire_bytes(self):
         """Return a byte stream suitable for transmitting over the wire."""
@@ -1587,6 +1598,7 @@
                 if record.storage_kind == 'groupcompress-block':
                     # Check to see if we really want to re-use this block
                     insert_manager = record._manager
+                    insert_manager._check_rebuild_block()
                     if len(insert_manager._factories) == 1:
                         # This block only has a single record in it
                         # Mark this block to be rebuilt
@@ -1606,6 +1618,11 @@
                                            'groupcompress-block-ref'):
                     if insert_manager is None:
                         raise AssertionError('No insert_manager set')
+                    if insert_manager is not record._manager:
+                        raise AssertionError('insert_manager does not match'
+                            ' the current record, we cannot be positive'
+                            ' that the appropriate content was inserted.'
+                            )
                     value = "%d %d %d %d" % (block_start, block_length,
                                              record._start, record._end)
                     nodes = [(record.key, value, (record.parents,))]



More information about the bazaar-commits mailing list