Rev 4467: Teach groupcompress repositories to honour pack hints, and also not error when a CHK page is not in the packs being repacked by partial pack operations. in http://people.ubuntu.com/~robertc/baz2.0/pending/autopack-cross-format-fetch

Mon Jun 22 05:56:28 BST 2009

At http://people.ubuntu.com/~robertc/baz2.0/pending/autopack-cross-format-fetch

------------------------------------------------------------
revno: 4467
revision-id: robertc at robertcollins.net-20090622045621-plce53iif067uod1
parent: robertc at robertcollins.net-20090622022509-qn2rjozy7g1hsmpv
committer: Robert Collins <robertc at robertcollins.net>
branch nick: autopack-cross-format-fetch
timestamp: Mon 2009-06-22 14:56:21 +1000
message:
  Teach groupcompress repositories to honour pack hints, and also not error when a CHK page is not in the packs being repacked by partial pack operations.
=== modified file 'NEWS'

--- a/NEWS	2009-06-22 02:25:09 +0000
+++ b/NEWS	2009-06-22 04:56:21 +0000
@@ -42,6 +42,10 @@
   ``BZR_PROGRESS_BAR`` is set to ``none``.
   (Martin Pool, #339385)
 
+* Repositories using CHK pages (which includes the new 2a format) will no
+  longer error during commit or push operations when an autopack operation
+  is triggered. (Robert Collins, #365615)
+
 Internals
 *********
 
@@ -58,6 +62,10 @@
   for files with long ancestry and 'cherrypicked' changes.)
   (John Arbash Meinel)
 
+* ``GroupCompress`` repositories now take advantage of the pack hints
+  parameter to permit cross-format fetching to incrementally pack the
+  converted data. (Robert Collins)
+
 * pack <=> pack fetching is now done via a ``PackStreamSource`` rather
   than the ``Packer`` code. The user visible change is that we now
   properly fetch the minimum number of texts for non-smart fetching.

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-06-19 04:26:02 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-06-22 04:56:21 +0000
@@ -218,6 +218,7 @@
             p_id_roots_set = set()
             stream = source_vf.get_record_stream(keys, 'groupcompress', True)
             for idx, record in enumerate(stream):
+                # Inventories should always be with revisions; assume success.
                 bytes = record.get_bytes_as('fulltext')
                 chk_inv = inventory.CHKInventory.deserialise(None, bytes,
                                                              record.key)
@@ -294,6 +295,11 @@
                     stream = source_vf.get_record_stream(cur_keys,
                                                          'as-requested', True)
                     for record in stream:
+                        if record.storage_kind == 'absent':
+                            # An absent CHK record: we assume that the missing
+                            # record is in a different pack - e.g. a page not
+                            # altered by the commit we're packing.
+                            continue
                         bytes = record.get_bytes_as('fulltext')
                         # We don't care about search_key_func for this code,
                         # because we only care about external references.

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-06-22 02:25:09 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-06-22 04:56:21 +0000
@@ -1562,22 +1562,19 @@
         self.ensure_loaded()
         total_packs = len(self._names)
         if self._already_packed():
-            # This is arguably wrong because we might not be optimal, but for
-            # now lets leave it in. (e.g. reconcile -> one pack. But not
-            # optimal.
             return
         total_revisions = self.revision_index.combined_index.key_count()
         # XXX: the following may want to be a class, to pack with a given
         # policy.
         mutter('Packing repository %s, which has %d pack files, '
-            'containing %d revisions into 1 packs.', self, total_packs,
-            total_revisions)
+            'containing %d revisions with hint %r.', self, total_packs,
+            total_revisions, hint)
         # determine which packs need changing
-        pack_distribution = [1]
         pack_operations = [[0, []]]
         for pack in self.all_packs():
-            pack_operations[-1][0] += pack.get_revision_count()
-            pack_operations[-1][1].append(pack)
+            if not hint or pack.name in hint:
+                pack_operations[-1][0] += pack.get_revision_count()
+                pack_operations[-1][1].append(pack)
         self._execute_pack_operations(pack_operations, OptimisingPacker)
 
     def plan_autopack_combinations(self, existing_packs, pack_distribution):

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2009-06-19 04:19:22 +0000
+++ b/bzrlib/tests/test_repository.py	2009-06-22 04:56:21 +0000
@@ -692,6 +692,25 @@
         self.assertEqual(65536,
             inv.parent_id_basename_to_file_id._root_node.maximum_size)
 
+    def test_pack_with_hint(self):
+        tree = self.make_branch_and_tree('tree', format='2a')
+        # 1 commit to leave untouched
+        tree.commit('1')
+        to_keep = tree.branch.repository._pack_collection.names()
+        # 2 to combine
+        tree.commit('2')
+        tree.commit('3')
+        all = tree.branch.repository._pack_collection.names()
+        combine = list(set(all) - set(to_keep))
+        self.assertLength(3, all)
+        self.assertLength(2, combine)
+        tree.branch.repository.pack(hint=combine)
+        final = tree.branch.repository._pack_collection.names()
+        self.assertLength(2, final)
+        self.assertFalse(combine[0] in final)
+        self.assertFalse(combine[1] in final)
+        self.assertSubset(to_keep, final)
+
     def test_stream_source_to_gc(self):
         source = self.make_repository('source', format='2a')
         target = self.make_repository('target', format='2a')