Rev 5377: (spiv) Add hidden option 'bzr reconcile --canonicalize-chks' to repair repos in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Aug 16 07:46:19 BST 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5377 [merge]
revision-id: pqm at pqm.ubuntu.com-20100816064617-wizstoapjbffkj05
parent: pqm at pqm.ubuntu.com-20100811024926-irizklc3vvd9h45p
parent: andrew.bennetts at canonical.com-20100812041907-lgauj5jm4voyjxbm
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2010-08-16 07:46:17 +0100
message:
(spiv) Add hidden option 'bzr reconcile --canonicalize-chks' to repair repos
affected by bug 522637. (Andrew Bennetts)
modified:
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/reconcile.py reweave_inventory.py-20051108164726-1e5e0934febac06e
bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py 2010-08-07 20:31:22 +0000
+++ b/bzrlib/builtins.py 2010-08-11 05:48:09 +0000
@@ -1592,11 +1592,17 @@
_see_also = ['check']
takes_args = ['branch?']
+ takes_options = [
+ Option('canonicalize-chks',
+ help='Make sure CHKs are in canonical form (repairs '
+ 'bug 522637).',
+ hidden=True),
+ ]
- def run(self, branch="."):
+ def run(self, branch=".", canonicalize_chks=False):
from bzrlib.reconcile import reconcile
dir = bzrdir.BzrDir.open(branch)
- reconcile(dir)
+ reconcile(dir, canonicalize_chks=canonicalize_chks)
class cmd_revision_history(Command):
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py 2010-05-20 18:23:17 +0000
+++ b/bzrlib/reconcile.py 2010-08-12 04:19:07 +0000
@@ -36,7 +36,7 @@
from bzrlib.versionedfile import AdapterFactory, FulltextContentFactory
-def reconcile(dir, other=None):
+def reconcile(dir, canonicalize_chks=False):
"""Reconcile the data in dir.
Currently this is limited to a inventory 'reweave'.
@@ -46,18 +46,19 @@
Directly using Reconciler is recommended for library users that
desire fine grained control or analysis of the found issues.
- :param other: another bzrdir to reconcile against.
+ :param canonicalize_chks: Make sure CHKs are in canonical form.
"""
- reconciler = Reconciler(dir, other=other)
+ reconciler = Reconciler(dir, canonicalize_chks=canonicalize_chks)
reconciler.reconcile()
class Reconciler(object):
"""Reconcilers are used to reconcile existing data."""
- def __init__(self, dir, other=None):
+ def __init__(self, dir, other=None, canonicalize_chks=False):
"""Create a Reconciler."""
self.bzrdir = dir
+ self.canonicalize_chks = canonicalize_chks
def reconcile(self):
"""Perform reconciliation.
@@ -98,7 +99,15 @@
ui.ui_factory.note('Reconciling repository %s' %
self.repo.user_url)
self.pb.update("Reconciling repository", 0, 1)
- repo_reconciler = self.repo.reconcile(thorough=True)
+ if self.canonicalize_chks:
+ try:
+ self.repo.reconcile_canonicalize_chks
+ except AttributeError:
+ raise errors.BzrError(
+ "%s cannot canonicalize CHKs." % (self.repo,))
+ repo_reconciler = self.repo.reconcile_canonicalize_chks()
+ else:
+ repo_reconciler = self.repo.reconcile(thorough=True)
self.inconsistent_parents = repo_reconciler.inconsistent_parents
self.garbage_inventories = repo_reconciler.garbage_inventories
if repo_reconciler.aborted:
@@ -496,6 +505,12 @@
# - unlock the names list
# https://bugs.launchpad.net/bzr/+bug/154173
+ def __init__(self, repo, other=None, thorough=False,
+ canonicalize_chks=False):
+ super(PackReconciler, self).__init__(repo, other=other,
+ thorough=thorough)
+ self.canonicalize_chks = canonicalize_chks
+
def _reconcile_steps(self):
"""Perform the steps to reconcile this repository."""
if not self.thorough:
@@ -509,8 +524,12 @@
total_inventories = len(list(
collection.inventory_index.combined_index.iter_all_entries()))
if len(all_revisions):
- new_pack = self.repo._reconcile_pack(collection, packs,
- ".reconcile", all_revisions, self.pb)
+ if self.canonicalize_chks:
+ reconcile_meth = self.repo._canonicalize_chks_pack
+ else:
+ reconcile_meth = self.repo._reconcile_pack
+ new_pack = reconcile_meth(collection, packs, ".reconcile",
+ all_revisions, self.pb)
if new_pack is not None:
self._discard_and_save(packs)
else:
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2010-05-14 13:25:05 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2010-08-11 06:14:09 +0000
@@ -32,11 +32,13 @@
revision as _mod_revision,
trace,
ui,
+ versionedfile,
)
from bzrlib.btree_index import (
BTreeGraphIndex,
BTreeBuilder,
)
+from bzrlib.decorators import needs_write_lock
from bzrlib.groupcompress import (
_GCGraphIndex,
GroupCompressVersionedFiles,
@@ -425,8 +427,11 @@
self._copy_stream(source_vf, target_vf, inventory_keys,
'inventories', self._get_filtered_inv_stream, 2)
+ def _get_chk_vfs_for_copy(self):
+ return self._build_vfs('chk', False, False)
+
def _copy_chk_texts(self):
- source_vf, target_vf = self._build_vfs('chk', False, False)
+ source_vf, target_vf = self._get_chk_vfs_for_copy()
# TODO: This is technically spurious... if it is a performance issue,
# remove it
total_keys = source_vf.keys()
@@ -578,6 +583,111 @@
return new_pack.data_inserted() and self._data_changed
+class GCCHKCanonicalizingPacker(GCCHKPacker):
+ """A packer that ensures inventories have canonical-form CHK maps.
+
+ Ideally this would be part of reconcile, but it's very slow and rarely
+ needed. (It repairs repositories affected by
+ https://bugs.launchpad.net/bzr/+bug/522637).
+ """
+
+ def __init__(self, *args, **kwargs):
+ super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
+ self._data_changed = False
+
+ def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
+ """Create and exhaust a stream, but don't insert it.
+
+ This is useful to get the side-effects of generating a stream.
+ """
+ self.pb.update('scanning %s' % (message,), pb_offset)
+ child_pb = ui.ui_factory.nested_progress_bar()
+ try:
+ list(vf_to_stream(source_vf, keys, message, child_pb))
+ finally:
+ child_pb.finished()
+
+ def _copy_inventory_texts(self):
+ source_vf, target_vf = self._build_vfs('inventory', True, True)
+ source_chk_vf, target_chk_vf = self._get_chk_vfs_for_copy()
+ inventory_keys = source_vf.keys()
+ # First, copy the existing CHKs on the assumption that most of them
+ # will be correct. This will save us from having to reinsert (and
+ # recompress) these records later at the cost of perhaps preserving a
+ # few unused CHKs.
+ # (Iterate but don't insert _get_filtered_inv_stream to populate the
+ # variables needed by GCCHKPacker._copy_chk_texts.)
+ self._exhaust_stream(source_vf, inventory_keys, 'inventories',
+ self._get_filtered_inv_stream, 2)
+ GCCHKPacker._copy_chk_texts(self)
+ # Now copy and fix the inventories, and any regenerated CHKs.
+ def chk_canonicalizing_inv_stream(source_vf, keys, message, pb=None):
+ return self._get_filtered_canonicalizing_inv_stream(
+ source_vf, keys, message, pb, source_chk_vf, target_chk_vf)
+ self._copy_stream(source_vf, target_vf, inventory_keys,
+ 'inventories', chk_canonicalizing_inv_stream, 4)
+
+ def _copy_chk_texts(self):
+ # No-op; in this class this happens during _copy_inventory_texts.
+ pass
+
+ def _get_filtered_canonicalizing_inv_stream(self, source_vf, keys, message,
+ pb=None, source_chk_vf=None, target_chk_vf=None):
+ """Filter the texts of inventories, regenerating CHKs to make sure they
+ are canonical.
+ """
+ total_keys = len(keys)
+ target_chk_vf = versionedfile.NoDupeAddLinesDecorator(target_chk_vf)
+ def _filtered_inv_stream():
+ stream = source_vf.get_record_stream(keys, 'groupcompress', True)
+ search_key_name = None
+ for idx, record in enumerate(stream):
+ # Inventories should always be with revisions; assume success.
+ bytes = record.get_bytes_as('fulltext')
+ chk_inv = inventory.CHKInventory.deserialise(
+ source_chk_vf, bytes, record.key)
+ if pb is not None:
+ pb.update('inv', idx, total_keys)
+ chk_inv.id_to_entry._ensure_root()
+ if search_key_name is None:
+ # Find the name corresponding to the search_key_func
+ search_key_reg = chk_map.search_key_registry
+ for search_key_name, func in search_key_reg.iteritems():
+ if func == chk_inv.id_to_entry._search_key_func:
+ break
+ canonical_inv = inventory.CHKInventory.from_inventory(
+ target_chk_vf, chk_inv,
+ maximum_size=chk_inv.id_to_entry._root_node._maximum_size,
+ search_key_name=search_key_name)
+ if chk_inv.id_to_entry.key() != canonical_inv.id_to_entry.key():
+ trace.mutter(
+ 'Non-canonical CHK map for id_to_entry of inv: %s '
+ '(root is %s, should be %s)' % (chk_inv.revision_id,
+ chk_inv.id_to_entry.key()[0],
+ canonical_inv.id_to_entry.key()[0]))
+ self._data_changed = True
+ p_id_map = chk_inv.parent_id_basename_to_file_id
+ p_id_map._ensure_root()
+ canon_p_id_map = canonical_inv.parent_id_basename_to_file_id
+ if p_id_map.key() != canon_p_id_map.key():
+ trace.mutter(
+ 'Non-canonical CHK map for parent_id_to_basename of '
+ 'inv: %s (root is %s, should be %s)'
+ % (chk_inv.revision_id, p_id_map.key()[0],
+ canon_p_id_map.key()[0]))
+ self._data_changed = True
+ yield versionedfile.ChunkedContentFactory(record.key,
+ record.parents, record.sha1,
+ canonical_inv.to_lines())
+ # We have finished processing all of the inventory records, we
+ # don't need these sets anymore
+ return _filtered_inv_stream()
+
+ def _use_pack(self, new_pack):
+ """Override _use_pack to check for reconcile having changed content."""
+ return new_pack.data_inserted() and self._data_changed
+
+
class GCRepositoryPackCollection(RepositoryPackCollection):
pack_factory = GCPack
@@ -999,10 +1109,24 @@
finally:
pb.finished()
+ @needs_write_lock
+ def reconcile_canonicalize_chks(self):
+ """Reconcile this repository to make sure all CHKs are in canonical
+ form.
+ """
+ from bzrlib.reconcile import PackReconciler
+ reconciler = PackReconciler(self, thorough=True, canonicalize_chks=True)
+ reconciler.reconcile()
+ return reconciler
+
def _reconcile_pack(self, collection, packs, extension, revs, pb):
packer = GCCHKReconcilePacker(collection, packs, extension)
return packer.pack(pb)
+ def _canonicalize_chks_pack(self, collection, packs, extension, revs, pb):
+ packer = GCCHKCanonicalizingPacker(collection, packs, extension, revs)
+ return packer.pack(pb)
+
def _get_source(self, to_format):
"""Return a source for streaming from this repository."""
if self._format._serializer == to_format._serializer:
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2010-08-11 01:27:46 +0000
+++ b/bzrlib/versionedfile.py 2010-08-16 06:46:17 +0000
@@ -1725,6 +1725,46 @@
yield (l, key)
+class NoDupeAddLinesDecorator(object):
+ """Decorator for a VersionedFiles that skips doing an add_lines if the key
+ is already present.
+ """
+
+ def __init__(self, store):
+ self._store = store
+
+ def add_lines(self, key, parents, lines, parent_texts=None,
+ left_matching_blocks=None, nostore_sha=None, random_id=False,
+ check_content=True):
+ """See VersionedFiles.add_lines.
+
+ This implementation may return None as the third element of the return
+ value when the original store wouldn't.
+ """
+ if nostore_sha:
+ raise NotImplementedError(
+ "NoDupeAddLinesDecorator.add_lines does not implement the "
+ "nostore_sha behaviour.")
+ if key[-1] is None:
+ sha1 = osutils.sha_strings(lines)
+ key = ("sha1:" + sha1,)
+ else:
+ sha1 = None
+ if key in self._store.get_parent_map([key]):
+ # This key has already been inserted, so don't do it again.
+ if sha1 is None:
+ sha1 = osutils.sha_strings(lines)
+ return sha1, sum(map(len, lines)), None
+ return self._store.add_lines(key, parents, lines,
+ parent_texts=parent_texts,
+ left_matching_blocks=left_matching_blocks,
+ nostore_sha=nostore_sha, random_id=random_id,
+ check_content=check_content)
+
+ def __getattr__(self, name):
+ return getattr(self._store, name)
+
+
def network_bytes_to_kind_and_offset(network_bytes):
"""Strip of a record kind from the front of network_bytes.
More information about the bazaar-commits
mailing list