Rev 4675: Get a test written which exercises the 'trim' code path. in http://bazaar.launchpad.net/~jameinel/bzr/2.1b1-pack-on-the-fly

Thu Sep 3 16:23:56 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.1b1-pack-on-the-fly

------------------------------------------------------------
revno: 4675
revision-id: john at arbash-meinel.com-20090903152346-wysd9b9xork5qxs5
parent: john at arbash-meinel.com-20090902204614-33hhdj8dmimdhxw9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1b1-pack-on-the-fly
timestamp: Thu 2009-09-03 10:23:46 -0500
message:
  Get a test written which exercises the 'trim' code path.
  
  However, to get that exercised exposed that it isn't a code path we will ever
  hit 'in the wild'.
  
  Specifically, the new 'rebuild on the fly' code path says that any group less
  than 75% utilized is scheduled for being rebuilt. But the 'trim' code path
  only activates when we are using <50% of the block, and the last byte
  is <2*percent_used. (So if we are using 30% of the block, the last byte
  must be <60%, then we will trim rather than rebuild.)
  
  Anyway, any condition under which we would trim is actually being turned into
  a rebuild. So it is probably best to remove the test and the code path
  during insert_record_stream.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'

--- a/bzrlib/groupcompress.py	2009-09-02 20:46:14 +0000
+++ b/bzrlib/groupcompress.py	2009-09-03 15:23:46 +0000
@@ -1662,10 +1662,16 @@
             if reuse_blocks:
                 # If the reuse_blocks flag is set, check to see if we can just
                 # copy a groupcompress block as-is.
+                # We only check on the first record (groupcompress-block) not
+                # on all of the (groupcompress-block-ref) entries.
+                # The reuse_this_block flag is then kept for as long as
                 if record.storage_kind == 'groupcompress-block':
                     # Check to see if we really want to re-use this block
                     insert_manager = record._manager
+                    import pdb; pdb.set_trace()
                     reuse_this_block = insert_manager.check_is_well_utilized()
+            else:
+                reuse_this_block = False
             if reuse_this_block:
                 # We still want to reuse this block
                 if record.storage_kind == 'groupcompress-block':

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-09-02 20:46:14 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-09-03 15:23:46 +0000
@@ -697,6 +697,8 @@
         vf.insert_record_stream(grouped_stream('abcdefghijkl'))
         vf.writer.end()
         block = manager = None
+        raw_block_bytes = None
+        raw_block_z_bytes = None
         record_order = []
         # Everything should fit in a single block
         for record in vf.get_record_stream([(r,) for r in 'abcdefghijkl'],
@@ -705,11 +707,34 @@
             if block is None:
                 block = record._manager._block
                 manager = record._manager
+                raw_block_z_bytes = block._z_content
+                block._ensure_content(block._content_length)
+                raw_block_bytes = block._content
             else:
                 self.assertIs(block, record._manager._block)
                 self.assertIs(manager, record._manager)
         # 'unordered' fetching will put that in the same order it was inserted
         self.assertEqual([(r,) for r in 'abcdefghijkl'], record_order)
+        # If we fetch enough of the block, but not everything, then it
+        # should simply decompress, truncate, and recompress
+        vf2 = self.make_test_vf(True, dir='target')
+        def small_stream():
+            for record in vf.get_record_stream([(r,) for r in 'acf'],
+                                               'unordered', False):
+                record._manager._full_enough_block_size = 50
+                record._manager._max_cut_fraction = 0.3
+                yield record
+        vf2.insert_record_stream(small_stream())
+            
+        vf2.writer.end()
+        record = vf2.get_record_stream([('a',)], 'unordered', False).next()
+        new_block = record._manager._block
+        self.assertIsNot(None, new_block._z_content)
+        self.assertNotEqual(raw_block_z_bytes, new_block._z_content)
+        new_block._ensure_content(new_block._content_length)
+        # The new content is simply the truncation of the old content
+        self.assertStartsWith(raw_block_bytes, new_block._content)
+        self.assertTrue(len(new_block._content) < len(raw_block_bytes))
 
     def test_add_missing_noncompression_parent_unvalidated_index(self):
         unvalidated = self.make_g_index_missing_parent()