Rev 3070: (robertc) Fix an ordering insertion issue with reconcile of pack in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Dec 3 21:03:49 GMT 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 3070
revision-id:pqm at pqm.ubuntu.com-20071203210338-3w0ryakegm0xopp0
parent: pqm at pqm.ubuntu.com-20071203202639-842vhwu1asbujc95
parent: robertc at robertcollins.net-20071203192640-4sv1t0zsup9p8x8t
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-12-03 21:03:38 +0000
message:
  (robertc) Fix an ordering insertion issue with reconcile of pack
  	repositories. (Robert Collins)
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 3063.2.2
    revision-id:robertc at robertcollins.net-20071203192640-4sv1t0zsup9p8x8t
    parent: robertc at robertcollins.net-20071203061605-3hw1lhioz72nmm4v
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: reconcile.toposort
    timestamp: Tue 2007-12-04 06:26:40 +1100
    message:
      Review feedback.
    modified:
      bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
    ------------------------------------------------------------
    revno: 3063.2.1
    revision-id:robertc at robertcollins.net-20071203061605-3hw1lhioz72nmm4v
    parent: pqm at pqm.ubuntu.com-20071201001053-zi6k6s2817c1p97s
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: reconcile.toposort
    timestamp: Mon 2007-12-03 17:16:05 +1100
    message:
      Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
    modified:
      bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-12-01 00:10:53 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-12-03 19:26:40 +0000
@@ -38,6 +38,7 @@
 from bzrlib.osutils import rand_chars
 from bzrlib.pack import ContainerWriter
 from bzrlib.store import revision
+from bzrlib import tsort
 """)
 from bzrlib import (
     bzrdir,
@@ -924,7 +925,10 @@
         # we have three major tasks here:
         # 1) generate the ideal index
         repo = self._pack_collection.repo
-        ideal_index = repo._generate_text_key_index(self._text_refs)
+        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
+            _1, key, _2, refs in 
+            self.new_pack.revision_index.iter_all_entries()])
+        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
         # 2) generate a text_nodes list that contains all the deltas that can
         #    be used as-is, with corrected parents.
         ok_nodes = []
@@ -966,7 +970,20 @@
         # 3) bulk copy the ok data
         list(self._copy_nodes_graph(ok_nodes, text_index_map,
             self.new_pack._writer, self.new_pack.text_index))
-        # 3) adhoc copy all the other texts.
+        # 4) adhoc copy all the other texts.
+        # We have to topologically insert all texts otherwise we can fail to
+        # reconcile when parts of a single delta chain are preserved intact,
+        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
+        # reinserted, and if d3 has incorrect parents it will also be
+        # reinserted. If we insert d3 first, d2 is present (as it was bulk
+        # copied), so we will try to delta, but d2 is not currently able to be
+        # extracted because it's basis d1 is not present. Topologically sorting
+        # addresses this. The following generates a sort for all the texts that
+        # are being inserted without having to reference the entire text key
+        # space (we only topo sort the revisions, which is smaller).
+        topo_order = tsort.topo_sort(ancestors)
+        rev_order = dict(zip(topo_order, range(len(topo_order))))
+        bad_texts.sort(key=lambda key:rev_order[key[0][1]])
         transaction = repo.get_transaction()
         file_id_index = GraphIndexPrefixAdapter(
             self.new_pack.text_index,
@@ -1006,7 +1023,7 @@
             knit_index._add_callback = file_id_index.add_nodes
             output_knit.add_lines_with_ghosts(
                 key[1], parents, text_lines, random_id=True, check_content=False)
-        # 4) check that nothing inserted has a reference outside the keyspace.
+        # 5) check that nothing inserted has a reference outside the keyspace.
         missing_text_keys = self.new_pack._external_compression_parents_of_texts()
         if missing_text_keys:
             raise errors.BzrError('Reference to missing compression parents %r'

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-11-30 22:40:29 +0000
+++ b/bzrlib/repository.py	2007-12-03 06:16:05 +0000
@@ -1242,7 +1242,8 @@
                 raise errors.NoSuchIdInRepository(self, file_id)
             yield callable_data, weave.get_lines(revision_id)
 
-    def _generate_text_key_index(self, text_key_references=None):
+    def _generate_text_key_index(self, text_key_references=None,
+        ancestors=None):
         """Generate a new text key index for the repository.
 
         This is an expensive function that will take considerable time to run.
@@ -1252,8 +1253,9 @@
             the parents list will be [NULL_REVISION].
         """
         # All revisions, to find inventory parents.
-        revision_graph = self.get_revision_graph_with_ghosts()
-        ancestors = revision_graph.get_ancestors()
+        if ancestors is None:
+            revision_graph = self.get_revision_graph_with_ghosts()
+            ancestors = revision_graph.get_ancestors()
         if text_key_references is None:
             text_key_references = self.find_text_key_references()
         pb = ui.ui_factory.nested_progress_bar()




More information about the bazaar-commits mailing list