Rev 2954: Partial support for native reconcile with packs. in http://people.ubuntu.com/~robertc/baz2.0/reconcile

Robert Collins robertc at robertcollins.net
Wed Oct 31 22:34:01 GMT 2007


At http://people.ubuntu.com/~robertc/baz2.0/reconcile

------------------------------------------------------------
revno: 2954
revision-id:robertc at robertcollins.net-20071031223353-rwxukxnvocam1jlb
parent: robertc at robertcollins.net-20071031205323-pbtan7ltoci9slpq
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile
timestamp: Thu 2007-11-01 09:33:53 +1100
message:
  Partial support for native reconcile with packs.
modified:
  bzrlib/reconcile.py            reweave_inventory.py-20051108164726-1e5e0934febac06e
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_implementations/helpers.py helpers.py-20070924032407-m460yl9j5gu5ju85-2
  bzrlib/tests/repository_implementations/test_reconcile.py test_reconcile.py-20060223022332-572ef70a3288e369
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py	2007-10-31 20:53:23 +0000
+++ b/bzrlib/reconcile.py	2007-10-31 22:33:53 +0000
@@ -30,6 +30,7 @@
     errors,
     ui,
     repository,
+    repofmt,
     )
 from bzrlib.trace import mutter, note
 from bzrlib.tsort import TopoSorter
@@ -469,28 +470,36 @@
         """Perform the steps to reconcile this repository."""
         if not self.thorough:
             return
-        self.repo.lock_write()
+        collection = self.repo._pack_collection
+        collection.ensure_loaded()
+        collection.lock_names()
         try:
-            self.repo._pack_collection.ensure_loaded()
-            self.repo._pack_collection.lock_names()
-            try:
-                self.repo.start_write_group()
-                try:
-                    self._new_pack = self.repo._pack_collection._new_pack
-                    self._copy_revisions()
-                except:
-                    self.repo.abort_write_group()
-                    raise
-                else:
-                    self.repo.commit_write_group()
-            finally:
-                self.repo._pack_collection._unlock_names()
+            packs = collection.all_packs()
+            all_revisions = self.repo.all_revision_ids()
+            total_inventories = len(list(
+                collection.inventory_index.combined_index.iter_all_entries()))
+            if len(all_revisions):
+                self._packer = repofmt.pack_repo.ReconcilePacker(
+                    collection, packs, ".reconcile", all_revisions)
+                new_pack = self._packer.pack(pb=self.pb)
+                if new_pack is not None:
+                    self._discard_packs(packs)
+            else:
+                # only make a new pack when there is data to copy.
+                self._discard_packs(packs)
+            self.garbage_inventories = total_inventories - len(list(
+                collection.inventory_index.combined_index.iter_all_entries()))
         finally:
-            self.repo.unlock()
-
-    def _copy_revisions(self):
-        """Copy revisions, regenerating the index as we go."""
-
-    def _pack_correcting_data(self):
-        """Perform a total pack, regenerating as much data as possible."""
-        revisions = self.repo.all_revision_ids()
+            collection._unlock_names()
+
+    def _discard_packs(self, packs):
+        """Discard some packs from the repository.
+
+        This removes them from the memory index and renames them into the
+        obsolete packs directory.
+        :param packs: The packs to discard.
+        """
+        for pack in packs:
+            self.repo._pack_collection._remove_pack_from_memory(pack)
+        self.repo._pack_collection._save_pack_names()
+        self.repo._pack_collection._obsolete_packs(packs)

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2007-10-25 01:24:51 +0000
+++ b/bzrlib/remote.py	2007-10-31 22:33:53 +0000
@@ -261,6 +261,9 @@
         # for tests
         self._reconcile_does_inventory_gc = True
         self._reconcile_fixes_text_parents = True
+        # This depends on the actual remote format, so force it off for maximum
+        # compatibility.
+        self._reconcile_backsup_inventory = False
         self.base = self.bzrdir.transport.base
 
     def __str__(self):

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-10-31 20:53:23 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-10-31 22:33:53 +0000
@@ -479,12 +479,20 @@
     """Create a pack from packs."""
 
     def __init__(self, pack_collection, packs, suffix, revision_ids=None):
+        """Create a Packer.
+
+        :param pack_collection: A RepositoryPackCollection object where the
+            new pack is being written to.
+        :param packs: The packs to combine.
+        :param suffix: The suffix to use on the temporary files for the pack.
+        :param revision_ids: Revision ids to limit the pack to.
+        """
         self.packs = packs
         self.suffix = suffix
         self.revision_ids = revision_ids
         self._pack_collection = pack_collection
 
-    def pack(self):
+    def pack(self, pb=None):
         """Create a new pack by reading data from other packs.
 
         This does little more than a bulk copy of data. One key difference
@@ -494,10 +502,8 @@
         source packs are not altered and are not required to be in the current
         pack collection.
 
-        :param packs: An iterable of Packs to combine.
-        :param revision_ids: Either None, to copy all data, or a list
-            of revision_ids to limit the copied data to the data they
-            introduced.
+        :param pb: An optional progress bar to use. A nested bar is created if
+            this is None.
         :return: A Pack object, or None if nothing was copied.
         """
         # open a pack - using the same name as the last temporary file
@@ -513,11 +519,15 @@
                 return None
             else:
                 self.revision_ids = frozenset(self.revision_ids)
-        self.pb = ui.ui_factory.nested_progress_bar()
+        if pb is None:
+            self.pb = ui.ui_factory.nested_progress_bar()
+        else:
+            self.pb = pb
         try:
             return self._create_pack_from_packs()
         finally:
-            self.pb.finished()
+            if pb is None:
+                self.pb.finished()
 
     def open_pack(self):
         """Open a pack for the pack we are creating."""
@@ -763,6 +773,13 @@
                 record_index += 1
 
 
+class ReconcilePacker(Packer):
+    """A packer which regenerates indices etc as it copies.
+    
+    This is used by ``bzr reconcile`` to cause parent text pointers to be
+    regenerated.
+    """
+
 
 class RepositoryPackCollection(object):
     """Management of packs within a repository."""
@@ -1482,8 +1499,9 @@
         self._write_lock_count = 0
         self._transaction = None
         # for tests
-        self._reconcile_does_inventory_gc = False
+        self._reconcile_does_inventory_gc = True
         self._reconcile_fixes_text_parents = False
+        self._reconcile_backsup_inventory = False
 
     def _abort_write_group(self):
         self._pack_collection._abort_write_group()

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-10-31 20:53:23 +0000
+++ b/bzrlib/repository.py	2007-10-31 22:33:53 +0000
@@ -616,6 +616,7 @@
         # for tests
         self._reconcile_does_inventory_gc = True
         self._reconcile_fixes_text_parents = False
+        self._reconcile_backsup_inventory = True
         # not right yet - should be more semantically clear ? 
         # 
         self.control_store = control_store

=== modified file 'bzrlib/tests/repository_implementations/helpers.py'
--- a/bzrlib/tests/repository_implementations/helpers.py	2007-10-17 20:48:21 +0000
+++ b/bzrlib/tests/repository_implementations/helpers.py	2007-10-31 22:33:53 +0000
@@ -16,9 +16,11 @@
 
 """Helper classes for repository implementation tests."""
 
+from cStringIO import StringIO
 
 from bzrlib import (
     inventory,
+    osutils,
     revision as _mod_revision,
     )
 from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
@@ -53,11 +55,24 @@
         try:
             inv = inventory.Inventory(revision_id='revision-id')
             inv.root.revision = 'revision-id'
-            repo.add_inventory('revision-id', inv, [])
+            inv_sha1 = repo.add_inventory('revision-id', inv, [])
+            root_id = inv.root.file_id
+            vf = repo.weave_store.get_weave_or_empty(root_id,
+                repo.get_transaction())
+            vf.add_lines('revision-id', [], [])
             revision = _mod_revision.Revision('revision-id',
                 committer='jrandom at example.com', timestamp=0,
-                inventory_sha1='', timezone=0, message='message', parent_ids=[])
-            repo.add_revision('revision-id',revision, inv)
+                inventory_sha1=inv_sha1, timezone=0, message='message',
+                parent_ids=[])
+            # Manually add the revision text using the RevisionStore API, with
+            # bad parents.
+            rev_tmp = StringIO()
+            repo._revision_store._serializer.write_revision(revision, rev_tmp)
+            rev_tmp.seek(0)
+            repo._revision_store.get_revision_file(repo.get_transaction()
+                ).add_lines_with_ghosts(revision.revision_id,
+                ['incorrect-parent'],
+                osutils.split_lines(rev_tmp.read()))
         except:
             repo.abort_write_group()
             repo.unlock()
@@ -66,15 +81,7 @@
             repo.commit_write_group()
             repo.unlock()
 
-        # Change the knit index's record of the parents for 'revision-id' to
-        # claim it has a parent, 'incorrect-parent', that doesn't exist in this
-        # knit at all.
         repo.lock_write()
         self.addCleanup(repo.unlock)
-        rev_knit = repo._get_revision_vf()
-        index_cache = rev_knit._index._cache
-        cached_index_entry = list(index_cache['revision-id'])
-        cached_index_entry[4] = ['incorrect-parent']
-        index_cache['revision-id'] = tuple(cached_index_entry)
         return repo
 

=== modified file 'bzrlib/tests/repository_implementations/test_reconcile.py'
--- a/bzrlib/tests/repository_implementations/test_reconcile.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/repository_implementations/test_reconcile.py	2007-10-31 22:33:53 +0000
@@ -69,6 +69,26 @@
         repo.commit_write_group()
         repo.unlock()
 
+        def add_commit(repo, revision_id, parent_ids):
+            repo.lock_write()
+            repo.start_write_group()
+            inv = Inventory(revision_id=revision_id)
+            inv.root.revision = revision_id
+            root_id = inv.root.file_id
+            sha1 = repo.add_inventory(revision_id, inv, parent_ids)
+            vf = repo.weave_store.get_weave_or_empty(root_id,
+                repo.get_transaction())
+            vf.add_lines(revision_id, [], [])
+            rev = bzrlib.revision.Revision(timestamp=0,
+                                           timezone=None,
+                                           committer="Foo Bar <foo at example.com>",
+                                           message="Message",
+                                           inventory_sha1=sha1,
+                                           revision_id=revision_id)
+            rev.parent_ids = parent_ids
+            repo.add_revision(revision_id, rev)
+            repo.commit_write_group()
+            repo.unlock()
         # an empty inventory with no revision for testing with.
         # this is referenced by 'references_missing' to let us test
         # that all the cached data is correctly converted into ghost links
@@ -77,59 +97,21 @@
         repo.lock_write()
         repo.start_write_group()
         repo.add_inventory('missing', inv, [])
-        inv = Inventory(revision_id='references_missing')
-        inv.root.revision = 'references_missing'
-        sha1 = repo.add_inventory('references_missing', inv, ['missing'])
-        rev = Revision(timestamp=0,
-                       timezone=None,
-                       committer="Foo Bar <foo at example.com>",
-                       message="Message",
-                       inventory_sha1=sha1,
-                       revision_id='references_missing')
-        rev.parent_ids = ['missing']
-        repo.add_revision('references_missing', rev)
         repo.commit_write_group()
         repo.unlock()
+        add_commit(repo, 'references_missing', ['missing'])
 
         # a inventory with no parents and the revision has parents..
         # i.e. a ghost.
         repo = self.make_repository('inventory_one_ghost')
-        repo.lock_write()
-        repo.start_write_group()
-        inv = Inventory(revision_id='ghost')
-        inv.root.revision = 'ghost'
-        sha1 = repo.add_inventory('ghost', inv, [])
-        rev = Revision(timestamp=0,
-                       timezone=None,
-                       committer="Foo Bar <foo at example.com>",
-                       message="Message",
-                       inventory_sha1=sha1,
-                       revision_id='ghost')
-        rev.parent_ids = ['the_ghost']
-        repo.add_revision('ghost', rev)
-        repo.commit_write_group()
-        repo.unlock()
+        add_commit(repo, 'ghost', ['the_ghost'])
          
         # a inventory with a ghost that can be corrected now.
         t.copy_tree('inventory_one_ghost', 'inventory_ghost_present')
         bzrdir_url = self.get_url('inventory_ghost_present')
         bzrdir = bzrlib.bzrdir.BzrDir.open(bzrdir_url)
         repo = bzrdir.open_repository()
-        repo.lock_write()
-        repo.start_write_group()
-        inv = Inventory(revision_id='the_ghost')
-        inv.root.revision = 'the_ghost'
-        sha1 = repo.add_inventory('the_ghost', inv, [])
-        rev = Revision(timestamp=0,
-                       timezone=None,
-                       committer="Foo Bar <foo at example.com>",
-                       message="Message",
-                       inventory_sha1=sha1,
-                       revision_id='the_ghost')
-        rev.parent_ids = []
-        repo.add_revision('the_ghost', rev)
-        repo.commit_write_group()
-        repo.unlock()
+        add_commit(repo, 'the_ghost', [])
 
     def checkEmptyReconcile(self, **kwargs):
         """Check a reconcile on an empty repository."""
@@ -212,9 +194,10 @@
                          repo.get_ancestry('references_missing'))
 
     def check_missing_was_removed(self, repo):
-        backup = repo.control_weaves.get_weave('inventory.backup',
-                                               repo.get_transaction())
-        self.assertTrue('missing' in backup.versions())
+        if repo._reconcile_backsup_inventory:
+            backup = repo.control_weaves.get_weave('inventory.backup',
+                                                   repo.get_transaction())
+            self.assertTrue('missing' in backup.versions())
         self.assertRaises(errors.RevisionNotPresent,
                           repo.get_inventory, 'missing')
 
@@ -340,6 +323,10 @@
         repo.start_write_group()
         inv = Inventory(revision_id='wrong-secondary-parent')
         inv.root.revision = 'wrong-secondary-parent'
+        root_id = inv.root.file_id
+        vf = repo.weave_store.get_weave_or_empty(root_id,
+            repo.get_transaction())
+        vf.add_lines('wrong-secondary-parent', [], [])
         sha1 = repo.add_inventory('wrong-secondary-parent', inv, ['1', '3', '2'])
         rev = Revision(timestamp=0,
                        timezone=None,



More information about the bazaar-commits mailing list