Rev 2954: Partial support for native reconcile with packs. in http://people.ubuntu.com/~robertc/baz2.0/reconcile
Robert Collins
robertc at robertcollins.net
Wed Oct 31 22:34:01 GMT 2007
At http://people.ubuntu.com/~robertc/baz2.0/reconcile
------------------------------------------------------------
revno: 2954
revision-id:robertc at robertcollins.net-20071031223353-rwxukxnvocam1jlb
parent: robertc at robertcollins.net-20071031205323-pbtan7ltoci9slpq
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile
timestamp: Thu 2007-11-01 09:33:53 +1100
message:
Partial support for native reconcile with packs.
modified:
bzrlib/reconcile.py reweave_inventory.py-20051108164726-1e5e0934febac06e
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/repository_implementations/helpers.py helpers.py-20070924032407-m460yl9j5gu5ju85-2
bzrlib/tests/repository_implementations/test_reconcile.py test_reconcile.py-20060223022332-572ef70a3288e369
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py 2007-10-31 20:53:23 +0000
+++ b/bzrlib/reconcile.py 2007-10-31 22:33:53 +0000
@@ -30,6 +30,7 @@
errors,
ui,
repository,
+ repofmt,
)
from bzrlib.trace import mutter, note
from bzrlib.tsort import TopoSorter
@@ -469,28 +470,36 @@
"""Perform the steps to reconcile this repository."""
if not self.thorough:
return
- self.repo.lock_write()
+ collection = self.repo._pack_collection
+ collection.ensure_loaded()
+ collection.lock_names()
try:
- self.repo._pack_collection.ensure_loaded()
- self.repo._pack_collection.lock_names()
- try:
- self.repo.start_write_group()
- try:
- self._new_pack = self.repo._pack_collection._new_pack
- self._copy_revisions()
- except:
- self.repo.abort_write_group()
- raise
- else:
- self.repo.commit_write_group()
- finally:
- self.repo._pack_collection._unlock_names()
+ packs = collection.all_packs()
+ all_revisions = self.repo.all_revision_ids()
+ total_inventories = len(list(
+ collection.inventory_index.combined_index.iter_all_entries()))
+ if len(all_revisions):
+ self._packer = repofmt.pack_repo.ReconcilePacker(
+ collection, packs, ".reconcile", all_revisions)
+ new_pack = self._packer.pack(pb=self.pb)
+ if new_pack is not None:
+ self._discard_packs(packs)
+ else:
+ # only make a new pack when there is data to copy.
+ self._discard_packs(packs)
+ self.garbage_inventories = total_inventories - len(list(
+ collection.inventory_index.combined_index.iter_all_entries()))
finally:
- self.repo.unlock()
-
- def _copy_revisions(self):
- """Copy revisions, regenerating the index as we go."""
-
- def _pack_correcting_data(self):
- """Perform a total pack, regenerating as much data as possible."""
- revisions = self.repo.all_revision_ids()
+ collection._unlock_names()
+
+ def _discard_packs(self, packs):
+ """Discard some packs from the repository.
+
+ This removes them from the memory index and renames them into the
+ obsolete packs directory.
+ :param packs: The packs to discard.
+ """
+ for pack in packs:
+ self.repo._pack_collection._remove_pack_from_memory(pack)
+ self.repo._pack_collection._save_pack_names()
+ self.repo._pack_collection._obsolete_packs(packs)
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2007-10-25 01:24:51 +0000
+++ b/bzrlib/remote.py 2007-10-31 22:33:53 +0000
@@ -261,6 +261,9 @@
# for tests
self._reconcile_does_inventory_gc = True
self._reconcile_fixes_text_parents = True
+ # This depends on the actual remote format, so force it off for maximum
+ # compatibility.
+ self._reconcile_backsup_inventory = False
self.base = self.bzrdir.transport.base
def __str__(self):
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-10-31 20:53:23 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-10-31 22:33:53 +0000
@@ -479,12 +479,20 @@
"""Create a pack from packs."""
def __init__(self, pack_collection, packs, suffix, revision_ids=None):
+ """Create a Packer.
+
+ :param pack_collection: A RepositoryPackCollection object where the
+ new pack is being written to.
+ :param packs: The packs to combine.
+ :param suffix: The suffix to use on the temporary files for the pack.
+ :param revision_ids: Revision ids to limit the pack to.
+ """
self.packs = packs
self.suffix = suffix
self.revision_ids = revision_ids
self._pack_collection = pack_collection
- def pack(self):
+ def pack(self, pb=None):
"""Create a new pack by reading data from other packs.
This does little more than a bulk copy of data. One key difference
@@ -494,10 +502,8 @@
source packs are not altered and are not required to be in the current
pack collection.
- :param packs: An iterable of Packs to combine.
- :param revision_ids: Either None, to copy all data, or a list
- of revision_ids to limit the copied data to the data they
- introduced.
+ :param pb: An optional progress bar to use. A nested bar is created if
+ this is None.
:return: A Pack object, or None if nothing was copied.
"""
# open a pack - using the same name as the last temporary file
@@ -513,11 +519,15 @@
return None
else:
self.revision_ids = frozenset(self.revision_ids)
- self.pb = ui.ui_factory.nested_progress_bar()
+ if pb is None:
+ self.pb = ui.ui_factory.nested_progress_bar()
+ else:
+ self.pb = pb
try:
return self._create_pack_from_packs()
finally:
- self.pb.finished()
+ if pb is None:
+ self.pb.finished()
def open_pack(self):
"""Open a pack for the pack we are creating."""
@@ -763,6 +773,13 @@
record_index += 1
+class ReconcilePacker(Packer):
+ """A packer which regenerates indices etc as it copies.
+
+ This is used by ``bzr reconcile`` to cause parent text pointers to be
+ regenerated.
+ """
+
class RepositoryPackCollection(object):
"""Management of packs within a repository."""
@@ -1482,8 +1499,9 @@
self._write_lock_count = 0
self._transaction = None
# for tests
- self._reconcile_does_inventory_gc = False
+ self._reconcile_does_inventory_gc = True
self._reconcile_fixes_text_parents = False
+ self._reconcile_backsup_inventory = False
def _abort_write_group(self):
self._pack_collection._abort_write_group()
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-10-31 20:53:23 +0000
+++ b/bzrlib/repository.py 2007-10-31 22:33:53 +0000
@@ -616,6 +616,7 @@
# for tests
self._reconcile_does_inventory_gc = True
self._reconcile_fixes_text_parents = False
+ self._reconcile_backsup_inventory = True
# not right yet - should be more semantically clear ?
#
self.control_store = control_store
=== modified file 'bzrlib/tests/repository_implementations/helpers.py'
--- a/bzrlib/tests/repository_implementations/helpers.py 2007-10-17 20:48:21 +0000
+++ b/bzrlib/tests/repository_implementations/helpers.py 2007-10-31 22:33:53 +0000
@@ -16,9 +16,11 @@
"""Helper classes for repository implementation tests."""
+from cStringIO import StringIO
from bzrlib import (
inventory,
+ osutils,
revision as _mod_revision,
)
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
@@ -53,11 +55,24 @@
try:
inv = inventory.Inventory(revision_id='revision-id')
inv.root.revision = 'revision-id'
- repo.add_inventory('revision-id', inv, [])
+ inv_sha1 = repo.add_inventory('revision-id', inv, [])
+ root_id = inv.root.file_id
+ vf = repo.weave_store.get_weave_or_empty(root_id,
+ repo.get_transaction())
+ vf.add_lines('revision-id', [], [])
revision = _mod_revision.Revision('revision-id',
committer='jrandom at example.com', timestamp=0,
- inventory_sha1='', timezone=0, message='message', parent_ids=[])
- repo.add_revision('revision-id',revision, inv)
+ inventory_sha1=inv_sha1, timezone=0, message='message',
+ parent_ids=[])
+ # Manually add the revision text using the RevisionStore API, with
+ # bad parents.
+ rev_tmp = StringIO()
+ repo._revision_store._serializer.write_revision(revision, rev_tmp)
+ rev_tmp.seek(0)
+ repo._revision_store.get_revision_file(repo.get_transaction()
+ ).add_lines_with_ghosts(revision.revision_id,
+ ['incorrect-parent'],
+ osutils.split_lines(rev_tmp.read()))
except:
repo.abort_write_group()
repo.unlock()
@@ -66,15 +81,7 @@
repo.commit_write_group()
repo.unlock()
- # Change the knit index's record of the parents for 'revision-id' to
- # claim it has a parent, 'incorrect-parent', that doesn't exist in this
- # knit at all.
repo.lock_write()
self.addCleanup(repo.unlock)
- rev_knit = repo._get_revision_vf()
- index_cache = rev_knit._index._cache
- cached_index_entry = list(index_cache['revision-id'])
- cached_index_entry[4] = ['incorrect-parent']
- index_cache['revision-id'] = tuple(cached_index_entry)
return repo
=== modified file 'bzrlib/tests/repository_implementations/test_reconcile.py'
--- a/bzrlib/tests/repository_implementations/test_reconcile.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/repository_implementations/test_reconcile.py 2007-10-31 22:33:53 +0000
@@ -69,6 +69,26 @@
repo.commit_write_group()
repo.unlock()
+ def add_commit(repo, revision_id, parent_ids):
+ repo.lock_write()
+ repo.start_write_group()
+ inv = Inventory(revision_id=revision_id)
+ inv.root.revision = revision_id
+ root_id = inv.root.file_id
+ sha1 = repo.add_inventory(revision_id, inv, parent_ids)
+ vf = repo.weave_store.get_weave_or_empty(root_id,
+ repo.get_transaction())
+ vf.add_lines(revision_id, [], [])
+ rev = bzrlib.revision.Revision(timestamp=0,
+ timezone=None,
+ committer="Foo Bar <foo at example.com>",
+ message="Message",
+ inventory_sha1=sha1,
+ revision_id=revision_id)
+ rev.parent_ids = parent_ids
+ repo.add_revision(revision_id, rev)
+ repo.commit_write_group()
+ repo.unlock()
# an empty inventory with no revision for testing with.
# this is referenced by 'references_missing' to let us test
# that all the cached data is correctly converted into ghost links
@@ -77,59 +97,21 @@
repo.lock_write()
repo.start_write_group()
repo.add_inventory('missing', inv, [])
- inv = Inventory(revision_id='references_missing')
- inv.root.revision = 'references_missing'
- sha1 = repo.add_inventory('references_missing', inv, ['missing'])
- rev = Revision(timestamp=0,
- timezone=None,
- committer="Foo Bar <foo at example.com>",
- message="Message",
- inventory_sha1=sha1,
- revision_id='references_missing')
- rev.parent_ids = ['missing']
- repo.add_revision('references_missing', rev)
repo.commit_write_group()
repo.unlock()
+ add_commit(repo, 'references_missing', ['missing'])
# a inventory with no parents and the revision has parents..
# i.e. a ghost.
repo = self.make_repository('inventory_one_ghost')
- repo.lock_write()
- repo.start_write_group()
- inv = Inventory(revision_id='ghost')
- inv.root.revision = 'ghost'
- sha1 = repo.add_inventory('ghost', inv, [])
- rev = Revision(timestamp=0,
- timezone=None,
- committer="Foo Bar <foo at example.com>",
- message="Message",
- inventory_sha1=sha1,
- revision_id='ghost')
- rev.parent_ids = ['the_ghost']
- repo.add_revision('ghost', rev)
- repo.commit_write_group()
- repo.unlock()
+ add_commit(repo, 'ghost', ['the_ghost'])
# a inventory with a ghost that can be corrected now.
t.copy_tree('inventory_one_ghost', 'inventory_ghost_present')
bzrdir_url = self.get_url('inventory_ghost_present')
bzrdir = bzrlib.bzrdir.BzrDir.open(bzrdir_url)
repo = bzrdir.open_repository()
- repo.lock_write()
- repo.start_write_group()
- inv = Inventory(revision_id='the_ghost')
- inv.root.revision = 'the_ghost'
- sha1 = repo.add_inventory('the_ghost', inv, [])
- rev = Revision(timestamp=0,
- timezone=None,
- committer="Foo Bar <foo at example.com>",
- message="Message",
- inventory_sha1=sha1,
- revision_id='the_ghost')
- rev.parent_ids = []
- repo.add_revision('the_ghost', rev)
- repo.commit_write_group()
- repo.unlock()
+ add_commit(repo, 'the_ghost', [])
def checkEmptyReconcile(self, **kwargs):
"""Check a reconcile on an empty repository."""
@@ -212,9 +194,10 @@
repo.get_ancestry('references_missing'))
def check_missing_was_removed(self, repo):
- backup = repo.control_weaves.get_weave('inventory.backup',
- repo.get_transaction())
- self.assertTrue('missing' in backup.versions())
+ if repo._reconcile_backsup_inventory:
+ backup = repo.control_weaves.get_weave('inventory.backup',
+ repo.get_transaction())
+ self.assertTrue('missing' in backup.versions())
self.assertRaises(errors.RevisionNotPresent,
repo.get_inventory, 'missing')
@@ -340,6 +323,10 @@
repo.start_write_group()
inv = Inventory(revision_id='wrong-secondary-parent')
inv.root.revision = 'wrong-secondary-parent'
+ root_id = inv.root.file_id
+ vf = repo.weave_store.get_weave_or_empty(root_id,
+ repo.get_transaction())
+ vf.add_lines('wrong-secondary-parent', [], [])
sha1 = repo.add_inventory('wrong-secondary-parent', inv, ['1', '3', '2'])
rev = Revision(timestamp=0,
timezone=None,
More information about the bazaar-commits
mailing list