Rev 15: Instead of always copying nodes and only fixing the index, in lp:///~jameinel/bzr/fix277537

John Arbash Meinel john at arbash-meinel.com
Thu Jan 29 23:06:45 GMT 2009


At lp:///~jameinel/bzr/fix277537

------------------------------------------------------------
revno: 15
revision-id: john at arbash-meinel.com-20090129230636-76yx5n2ri0i4n7av
parent: v.ladeuil+lp at free.fr-20081127073130-781r36ya03vwhxui
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: fix277537
timestamp: Thu 2009-01-29 17:06:36 -0600
message:
  Instead of always copying nodes and only fixing the index,
  change the code to re-insert the texts that would have deltas pointing at
  non-left-hand parents.
  
  'bzr reconcile' was written to do just that, but it somehow got removed
  in the new code.
-------------- next part --------------
=== modified file 'reconcile.py'
--- a/reconcile.py	2008-11-27 07:31:30 +0000
+++ b/reconcile.py	2009-01-29 23:06:36 +0000
@@ -29,6 +29,7 @@
     revision as _mod_revision,
     trace,
     tsort,
+    ui,
     )
 
 from bzrlib.repofmt import (
@@ -64,9 +65,10 @@
         ok_nodes = []
         NULL_REVISION = _mod_revision.NULL_REVISION
         text_index_map, text_nodes = self._get_text_nodes()
+        fixed_nodes = {}
         for node in text_nodes:
             # 0 - index
-            # 1 - key 
+            # 1 - key
             # 2 - value
             # 3 - refs
             key = node[1]
@@ -89,10 +91,23 @@
                 else:
                     wrong_parents = wrong_index[key]
                     if actual_parents == wrong_parents:
-                        # Preserve the representation but fix the parents
-                        ok_nodes.append((node[0], node[1], node[2],
-                                         (ideal_parents, node[3][1])))
-                        # Reinsert this text completely
+                        if not node[3][1] or node[3][1] == (ideal_parents[0],):
+                            # There is no delta compression, or it is already
+                            # pointing at the ideal_parent, so we can just copy
+                            # the data across as-is, and just update the index
+                            # appropriately.
+                            # Preserve the representation but fix the parents
+                            ok_nodes.append((node[0], node[1], node[2],
+                                             (ideal_parents, node[3][1])))
+                        elif node[3][1] == (wrong_parents[0],):
+                            # We have a compression parent pointing at a
+                            # left-hand parent which we will be changing.
+                            # We will copy this as a fulltext
+                            # Reinsert this text completely
+                            fixed_nodes[node[1]] = (wrong_parents,
+                                                    ideal_parents)
+                        else:
+                            import pdb; pdb.set_trace()
                         self._data_changed = True
                         self.wrong_parents_fixed += 1
                     else:
@@ -105,12 +120,56 @@
         del text_nodes
         del wrong_index
         del ideal_index
-        # 3) bulk copy the data
+        # 3) bulk copy the good data
         total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
         list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
             self.new_pack.text_index, readv_group_iter, total_items))
+        # 4) Now copy the fixed nodes
+        self._copy_fixed_nodes(fixed_nodes)
         self._log_copied_texts()
 
+    def _copy_fixed_nodes(self, fixed_nodes):
+        """Copy the texts where were marked "fix needed" via fulltext."""
+        if not fixed_nodes:
+            # Nothing to copy
+            return
+        # Copy the "fixed_nodes" by using get_record_stream() to extract
+        # them back to fulltexts, and insert them into becoming new deltas
+        repo = self._pack_collection.repo
+        data_access = knit._DirectPackAccess(
+                {self.new_pack.text_index:self.new_pack.access_tuple()})
+        data_access.set_writer(self.new_pack._writer,
+                               self.new_pack.text_index,
+                               self.new_pack.access_tuple())
+        output_texts = knit.KnitVersionedFiles(
+            knit._KnitGraphIndex(self.new_pack.text_index,
+                add_callback=self.new_pack.text_index.add_nodes,
+                deltas=True, parents=True, is_locked=repo.is_locked),
+            data_access=data_access, max_delta_chain=200)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            num_texts = len(fixed_nodes)
+            pb.update('Copying fulltext', 0, num_texts)
+            node_to_parents = dict((k, [p for p in v[1] if p in fixed_nodes])
+                                   for k, v in fixed_nodes.iteritems())
+            sorted_nodes = tsort.topo_sort(node_to_parents)
+            for start in xrange(0, num_texts, 100):
+                batch = sorted_nodes[start:start+100]
+                for idx, record in enumerate(repo.texts.get_record_stream(
+                                                  batch, 'topological', True)):
+                    # Flush the new_pack, so that deltas can be written against
+                    # the just-written data
+                    self.new_pack.flush()
+                    pb.update('Copying fulltext', start+idx, num_texts)
+                    wrong_parents, ideal_parents = fixed_nodes[record.key]
+                    assert record.parents == wrong_parents
+                    lines = osutils.chunks_to_lines(
+                                record.get_bytes_as('chunked'))
+                    output_texts.add_lines(record.key, ideal_parents, lines,
+                                           random_id=True, check_content=False)
+        finally:
+            pb.finished()
+
 
 class InventoryAncestryReconciler(reconcile.PackReconciler):
 



More information about the bazaar-commits mailing list