Rev 15: Instead of always copying nodes and only fixing the index, in lp:///~jameinel/bzr/fix277537
John Arbash Meinel
john at arbash-meinel.com
Thu Jan 29 23:06:45 GMT 2009
At lp:///~jameinel/bzr/fix277537
------------------------------------------------------------
revno: 15
revision-id: john at arbash-meinel.com-20090129230636-76yx5n2ri0i4n7av
parent: v.ladeuil+lp at free.fr-20081127073130-781r36ya03vwhxui
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: fix277537
timestamp: Thu 2009-01-29 17:06:36 -0600
message:
Instead of always copying nodes and only fixing the index,
change the code to re-insert the texts that would have deltas pointing at
non-left-hand parents.
'bzr reconcile' was written to do just that, but it somehow got removed
in the new code.
-------------- next part --------------
=== modified file 'reconcile.py'
--- a/reconcile.py 2008-11-27 07:31:30 +0000
+++ b/reconcile.py 2009-01-29 23:06:36 +0000
@@ -29,6 +29,7 @@
revision as _mod_revision,
trace,
tsort,
+ ui,
)
from bzrlib.repofmt import (
@@ -64,9 +65,10 @@
ok_nodes = []
NULL_REVISION = _mod_revision.NULL_REVISION
text_index_map, text_nodes = self._get_text_nodes()
+ fixed_nodes = {}
for node in text_nodes:
# 0 - index
- # 1 - key
+ # 1 - key
# 2 - value
# 3 - refs
key = node[1]
@@ -89,10 +91,23 @@
else:
wrong_parents = wrong_index[key]
if actual_parents == wrong_parents:
- # Preserve the representation but fix the parents
- ok_nodes.append((node[0], node[1], node[2],
- (ideal_parents, node[3][1])))
- # Reinsert this text completely
+ if not node[3][1] or node[3][1] == (ideal_parents[0],):
+ # There is no delta compression, or it is already
+ # pointing at the ideal_parent, so we can just copy
+ # the data across as-is, and just update the index
+ # appropriately.
+ # Preserve the representation but fix the parents
+ ok_nodes.append((node[0], node[1], node[2],
+ (ideal_parents, node[3][1])))
+ elif node[3][1] == (wrong_parents[0],):
+ # We have a compression parent pointing at a
+ # left-hand parent which we will be changing.
+ # We will copy this as a fulltext
+ # Reinsert this text completely
+ fixed_nodes[node[1]] = (wrong_parents,
+ ideal_parents)
+ else:
+ import pdb; pdb.set_trace()
self._data_changed = True
self.wrong_parents_fixed += 1
else:
@@ -105,12 +120,56 @@
del text_nodes
del wrong_index
del ideal_index
- # 3) bulk copy the data
+ # 3) bulk copy the good data
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
self.new_pack.text_index, readv_group_iter, total_items))
+ # 4) Now copy the fixed nodes
+ self._copy_fixed_nodes(fixed_nodes)
self._log_copied_texts()
+ def _copy_fixed_nodes(self, fixed_nodes):
+ """Copy the texts where were marked "fix needed" via fulltext."""
+ if not fixed_nodes:
+ # Nothing to copy
+ return
+ # Copy the "fixed_nodes" by using get_record_stream() to extract
+ # them back to fulltexts, and insert them into becoming new deltas
+ repo = self._pack_collection.repo
+ data_access = knit._DirectPackAccess(
+ {self.new_pack.text_index:self.new_pack.access_tuple()})
+ data_access.set_writer(self.new_pack._writer,
+ self.new_pack.text_index,
+ self.new_pack.access_tuple())
+ output_texts = knit.KnitVersionedFiles(
+ knit._KnitGraphIndex(self.new_pack.text_index,
+ add_callback=self.new_pack.text_index.add_nodes,
+ deltas=True, parents=True, is_locked=repo.is_locked),
+ data_access=data_access, max_delta_chain=200)
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ num_texts = len(fixed_nodes)
+ pb.update('Copying fulltext', 0, num_texts)
+ node_to_parents = dict((k, [p for p in v[1] if p in fixed_nodes])
+ for k, v in fixed_nodes.iteritems())
+ sorted_nodes = tsort.topo_sort(node_to_parents)
+ for start in xrange(0, num_texts, 100):
+ batch = sorted_nodes[start:start+100]
+ for idx, record in enumerate(repo.texts.get_record_stream(
+ batch, 'topological', True)):
+ # Flush the new_pack, so that deltas can be written against
+ # the just-written data
+ self.new_pack.flush()
+ pb.update('Copying fulltext', start+idx, num_texts)
+ wrong_parents, ideal_parents = fixed_nodes[record.key]
+ assert record.parents == wrong_parents
+ lines = osutils.chunks_to_lines(
+ record.get_bytes_as('chunked'))
+ output_texts.add_lines(record.key, ideal_parents, lines,
+ random_id=True, check_content=False)
+ finally:
+ pb.finished()
+
class InventoryAncestryReconciler(reconcile.PackReconciler):
More information about the bazaar-commits
mailing list