Rev 3070: (robertc) Fix an ordering insertion issue with reconcile of pack in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Dec 3 21:03:49 GMT 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3070
revision-id:pqm at pqm.ubuntu.com-20071203210338-3w0ryakegm0xopp0
parent: pqm at pqm.ubuntu.com-20071203202639-842vhwu1asbujc95
parent: robertc at robertcollins.net-20071203192640-4sv1t0zsup9p8x8t
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-12-03 21:03:38 +0000
message:
(robertc) Fix an ordering insertion issue with reconcile of pack
repositories. (Robert Collins)
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
------------------------------------------------------------
revno: 3063.2.2
revision-id:robertc at robertcollins.net-20071203192640-4sv1t0zsup9p8x8t
parent: robertc at robertcollins.net-20071203061605-3hw1lhioz72nmm4v
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile.toposort
timestamp: Tue 2007-12-04 06:26:40 +1100
message:
Review feedback.
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
------------------------------------------------------------
revno: 3063.2.1
revision-id:robertc at robertcollins.net-20071203061605-3hw1lhioz72nmm4v
parent: pqm at pqm.ubuntu.com-20071201001053-zi6k6s2817c1p97s
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile.toposort
timestamp: Mon 2007-12-03 17:16:05 +1100
message:
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-12-01 00:10:53 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-12-03 19:26:40 +0000
@@ -38,6 +38,7 @@
from bzrlib.osutils import rand_chars
from bzrlib.pack import ContainerWriter
from bzrlib.store import revision
+from bzrlib import tsort
""")
from bzrlib import (
bzrdir,
@@ -924,7 +925,10 @@
# we have three major tasks here:
# 1) generate the ideal index
repo = self._pack_collection.repo
- ideal_index = repo._generate_text_key_index(self._text_refs)
+ ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
+ _1, key, _2, refs in
+ self.new_pack.revision_index.iter_all_entries()])
+ ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
# 2) generate a text_nodes list that contains all the deltas that can
# be used as-is, with corrected parents.
ok_nodes = []
@@ -966,7 +970,20 @@
# 3) bulk copy the ok data
list(self._copy_nodes_graph(ok_nodes, text_index_map,
self.new_pack._writer, self.new_pack.text_index))
- # 3) adhoc copy all the other texts.
+ # 4) adhoc copy all the other texts.
+ # We have to topologically insert all texts otherwise we can fail to
+ # reconcile when parts of a single delta chain are preserved intact,
+ # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
+ # reinserted, and if d3 has incorrect parents it will also be
+ # reinserted. If we insert d3 first, d2 is present (as it was bulk
+ # copied), so we will try to delta, but d2 is not currently able to be
+ # extracted because it's basis d1 is not present. Topologically sorting
+ # addresses this. The following generates a sort for all the texts that
+ # are being inserted without having to reference the entire text key
+ # space (we only topo sort the revisions, which is smaller).
+ topo_order = tsort.topo_sort(ancestors)
+ rev_order = dict(zip(topo_order, range(len(topo_order))))
+ bad_texts.sort(key=lambda key:rev_order[key[0][1]])
transaction = repo.get_transaction()
file_id_index = GraphIndexPrefixAdapter(
self.new_pack.text_index,
@@ -1006,7 +1023,7 @@
knit_index._add_callback = file_id_index.add_nodes
output_knit.add_lines_with_ghosts(
key[1], parents, text_lines, random_id=True, check_content=False)
- # 4) check that nothing inserted has a reference outside the keyspace.
+ # 5) check that nothing inserted has a reference outside the keyspace.
missing_text_keys = self.new_pack._external_compression_parents_of_texts()
if missing_text_keys:
raise errors.BzrError('Reference to missing compression parents %r'
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-11-30 22:40:29 +0000
+++ b/bzrlib/repository.py 2007-12-03 06:16:05 +0000
@@ -1242,7 +1242,8 @@
raise errors.NoSuchIdInRepository(self, file_id)
yield callable_data, weave.get_lines(revision_id)
- def _generate_text_key_index(self, text_key_references=None):
+ def _generate_text_key_index(self, text_key_references=None,
+ ancestors=None):
"""Generate a new text key index for the repository.
This is an expensive function that will take considerable time to run.
@@ -1252,8 +1253,9 @@
the parents list will be [NULL_REVISION].
"""
# All revisions, to find inventory parents.
- revision_graph = self.get_revision_graph_with_ghosts()
- ancestors = revision_graph.get_ancestors()
+ if ancestors is None:
+ revision_graph = self.get_revision_graph_with_ghosts()
+ ancestors = revision_graph.get_ancestors()
if text_key_references is None:
text_key_references = self.find_text_key_references()
pb = ui.ui_factory.nested_progress_bar()
More information about the bazaar-commits
mailing list