Rev 3064: Solve reconciling erroring when multiple portions of a single delta chain are being reinserted. in http://people.ubuntu.com/~robertc/baz2.0/reconcile.toposort

Robert Collins robertc at robertcollins.net
Mon Dec 3 06:16:23 GMT 2007


At http://people.ubuntu.com/~robertc/baz2.0/reconcile.toposort

------------------------------------------------------------
revno: 3064
revision-id:robertc at robertcollins.net-20071203061605-3hw1lhioz72nmm4v
parent: pqm at pqm.ubuntu.com-20071201001053-zi6k6s2817c1p97s
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile.toposort
timestamp: Mon 2007-12-03 17:16:05 +1100
message:
  Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-12-01 00:10:53 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-12-03 06:16:05 +0000
@@ -38,6 +38,7 @@
 from bzrlib.osutils import rand_chars
 from bzrlib.pack import ContainerWriter
 from bzrlib.store import revision
+from bzrlib import tsort
 """)
 from bzrlib import (
     bzrdir,
@@ -924,7 +925,10 @@
         # we have three major tasks here:
         # 1) generate the ideal index
         repo = self._pack_collection.repo
-        ideal_index = repo._generate_text_key_index(self._text_refs)
+        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
+            _, key, _2, refs in 
+            self.new_pack.revision_index.iter_all_entries()])
+        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
         # 2) generate a text_nodes list that contains all the deltas that can
         #    be used as-is, with corrected parents.
         ok_nodes = []
@@ -963,10 +967,23 @@
         # we're finished with some data.
         del ideal_index
         del text_nodes
-        # 3) bulk copy the ok data
+        # 4) bulk copy the ok data
         list(self._copy_nodes_graph(ok_nodes, text_index_map,
             self.new_pack._writer, self.new_pack.text_index))
-        # 3) adhoc copy all the other texts.
+        # 4) adhoc copy all the other texts.
+        # We have to topologically insert all texts otherwise we can fail to
+        # reconcile when parts of a single delta chain are preserved intact,
+        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
+        # reinserted, and if d3 has incorrect parents it will also be
+        # reinserted. If we insert d3 first, d2 is present (as it was bulk
+        # copied), so we will try to delta, but d2 is not currently able to be
+        # extracted because it's basis d1 is not present. Topologically sorting
+        # addresses this. The following generates a sort for all the texts that
+        # are being inserted without having to reference the entire text key
+        # space (we only topo sort the revisions, which is smaller).
+        topo_order = tsort.topo_sort(ancestors)
+        rev_order = dict(zip(topo_order, range(len(topo_order))))
+        bad_texts.sort(key=lambda key:rev_order[key[0][1]])
         transaction = repo.get_transaction()
         file_id_index = GraphIndexPrefixAdapter(
             self.new_pack.text_index,

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-11-30 22:40:29 +0000
+++ b/bzrlib/repository.py	2007-12-03 06:16:05 +0000
@@ -1242,7 +1242,8 @@
                 raise errors.NoSuchIdInRepository(self, file_id)
             yield callable_data, weave.get_lines(revision_id)
 
-    def _generate_text_key_index(self, text_key_references=None):
+    def _generate_text_key_index(self, text_key_references=None,
+        ancestors=None):
         """Generate a new text key index for the repository.
 
         This is an expensive function that will take considerable time to run.
@@ -1252,8 +1253,9 @@
             the parents list will be [NULL_REVISION].
         """
         # All revisions, to find inventory parents.
-        revision_graph = self.get_revision_graph_with_ghosts()
-        ancestors = revision_graph.get_ancestors()
+        if ancestors is None:
+            revision_graph = self.get_revision_graph_with_ghosts()
+            ancestors = revision_graph.get_ancestors()
         if text_key_references is None:
             text_key_references = self.find_text_key_references()
         pb = ui.ui_factory.nested_progress_bar()



More information about the bazaar-commits mailing list