Rev 2970: * ``pack-0.92`` repositories can now be reconciled. in http://people.ubuntu.com/~robertc/baz2.0/reconcile
Robert Collins
robertc at robertcollins.net
Thu Nov 29 04:14:28 GMT 2007
At http://people.ubuntu.com/~robertc/baz2.0/reconcile
------------------------------------------------------------
revno: 2970
revision-id:robertc at robertcollins.net-20071129041406-4zzhzzpihmspiu9k
parent: robertc at robertcollins.net-20071129014512-qwfvultxzo9w6ot4
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reconcile.packs
timestamp: Thu 2007-11-29 15:14:06 +1100
message:
* ``pack-0.92`` repositories can now be reconciled.
(Robert Collins, #154173)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'NEWS'
--- a/NEWS 2007-11-28 01:35:49 +0000
+++ b/NEWS 2007-11-29 04:14:06 +0000
@@ -31,6 +31,9 @@
tree roots that's recorded for all other directories.
(Aaron Bentley, #164639)
+ * ``pack-0.92`` repositories can now be reconciled.
+ (Robert Collins, #154173)
+
* ``switch`` command added for changing the branch a lightweight checkout
is associated with and updating the tree to reflect the latest content
accordingly. This command was previously part of the BzrTools plug-in.
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-11-29 01:45:12 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-11-29 04:14:06 +0000
@@ -373,6 +373,14 @@
self.pack_transport, self.name,
time.time() - self.start_time)
+ def flush(self):
+ """Flush any current data."""
+ if self._buffer[1]:
+ bytes = ''.join(self._buffer[0])
+ self.write_stream.write(bytes)
+ self._hash.update(bytes)
+ self._buffer[:] = [[], 0]
+
def index_name(self, index_type, name):
"""Get the disk name of an index type for pack name 'name'."""
return name + NewPack.index_definitions[index_type][0]
@@ -531,6 +539,10 @@
# What text keys to copy. None for 'all texts'. This is set by
# _copy_inventory_texts
self._text_filter = None
+ self._extra_init()
+
+ def _extra_init(self):
+ """A template hook to allow extending the constructor trivially."""
def pack(self, pb=None):
"""Create a new pack by reading data from other packs.
@@ -621,49 +633,21 @@
inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,
self.new_pack._writer, self.new_pack.inventory_index, output_lines=True)
if self.revision_ids:
- fileid_revisions = self._pack_collection.repo._find_file_ids_from_xml_inventory_lines(
- inv_lines, self.revision_ids)
- text_filter = []
- for fileid, file_revids in fileid_revisions.iteritems():
- text_filter.extend(
- [(fileid, file_revid) for file_revid in file_revids])
+ self._process_inventory_lines(inv_lines)
else:
# eat the iterator to cause it to execute.
list(inv_lines)
- text_filter = None
+ self._text_filter = None
if 'pack' in debug.debug_flags:
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
time.ctime(), self._pack_collection._upload_transport.base,
self.new_pack.random_name,
self.new_pack.inventory_index.key_count(),
time.time() - new_pack.start_time)
- self._text_filter = text_filter
- def _create_pack_from_packs(self):
- self.pb.update("Opening pack", 0, 5)
- self.new_pack = self.open_pack()
- new_pack = self.new_pack
- # buffer data - we won't be reading-back during the pack creation and
- # this makes a significant difference on sftp pushes.
- new_pack.set_write_cache_size(1024*1024)
- if 'pack' in debug.debug_flags:
- plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
- for a_pack in self.packs]
- if self.revision_ids is not None:
- rev_count = len(self.revision_ids)
- else:
- rev_count = 'all'
- mutter('%s: create_pack: creating pack from source packs: '
- '%s%s %s revisions wanted %s t=0',
- time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
- plain_pack_list, rev_count)
- self._copy_revision_texts()
- self._copy_inventory_texts()
+ def _copy_text_texts(self):
# select text keys
- text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
- self.packs, 'text_index')[0]
- text_nodes = self._pack_collection._index_contents(text_index_map,
- self._text_filter)
+ text_index_map, text_nodes = self._get_text_nodes()
if self._text_filter is not None:
# We could return the keys copied as part of the return value from
# _copy_nodes_graph but this doesn't work all that well with the
@@ -683,12 +667,30 @@
# copy text keys and adjust values
self.pb.update("Copying content texts", 3)
list(self._copy_nodes_graph(text_nodes, text_index_map,
- new_pack._writer, new_pack.text_index))
+ self.new_pack._writer, self.new_pack.text_index))
+ self._log_copied_texts()
+
+ def _create_pack_from_packs(self):
+ self.pb.update("Opening pack", 0, 5)
+ self.new_pack = self.open_pack()
+ new_pack = self.new_pack
+ # buffer data - we won't be reading-back during the pack creation and
+ # this makes a significant difference on sftp pushes.
+ new_pack.set_write_cache_size(1024*1024)
if 'pack' in debug.debug_flags:
- mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
+ plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
+ for a_pack in self.packs]
+ if self.revision_ids is not None:
+ rev_count = len(self.revision_ids)
+ else:
+ rev_count = 'all'
+ mutter('%s: create_pack: creating pack from source packs: '
+ '%s%s %s revisions wanted %s t=0',
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
- new_pack.text_index.key_count(),
- time.time() - new_pack.start_time)
+ plain_pack_list, rev_count)
+ self._copy_revision_texts()
+ self._copy_inventory_texts()
+ self._copy_text_texts()
# select signature keys
signature_filter = self._revision_keys # same keyspace
signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
@@ -832,6 +834,38 @@
pb.update("Copied record", record_index)
record_index += 1
+ def _external_compression_parents_of_new_texts(self)
+ keys = set()
+ refs = set()
+ for node in self.new_pack.text_index.iter_all_entries():
+ keys.add(node[1])
+ refs.update(node[3][1])
+ return refs - keys
+
+ def _get_text_nodes(self):
+ text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
+ self.packs, 'text_index')[0]
+ return text_index_map, self._pack_collection._index_contents(text_index_map,
+ self._text_filter)
+
+ def _log_copied_texts(self):
+ if 'pack' in debug.debug_flags:
+ mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
+ time.ctime(), self._pack_collection._upload_transport.base,
+ self.new_pack.random_name,
+ self.new_pack.text_index.key_count(),
+ time.time() - self.new_pack.start_time)
+
+ def _process_inventory_lines(self, inv_lines):
+ """Use up the inv_lines generator and setup a text key filter."""
+ repo = self._pack_collection.repo
+ fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
+ inv_lines, self.revision_ids)
+ text_filter = []
+ for fileid, file_revids in fileid_revisions.iteritems():
+ text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
+ self._text_filter = text_filter
+
def _use_pack(self, new_pack):
"""Return True if new_pack should be used.
@@ -848,9 +882,120 @@
regenerated.
"""
+ def _extra_init(self):
+ self._data_changed = False
+
+ def _process_inventory_lines(self, inv_lines):
+ """Generate a text key reference map rather for reconciling with."""
+ repo = self._pack_collection.repo
+ refs = repo._find_text_key_references_from_xml_inventory_lines(
+ inv_lines)
+ self._text_refs = refs
+ # during reconcile we:
+ # - convert unreferenced texts to full texts
+ # - correct texts which reference a text not copied to be full texts
+ # - copy all others as-is but with corrected parents.
+ # - so at this point we don't know enough to decide what becomes a full
+ # text.
+ self._text_filter = None
+
+ def _copy_text_texts(self):
+ """generate what texts we should have and then copy."""
+ self.pb.update("Copying content texts", 3)
+ # we have three major tasks here:
+ # 1) generate the ideal index
+ repo = self._pack_collection.repo
+ ideal_index = repo._generate_text_key_index(self._text_refs)
+ # 2) generate a text_nodes list that contains all the deltas that can
+ # be used as-is, with corrected parents.
+ ok_nodes = []
+ bad_texts = []
+ discarded_nodes = []
+ NULL_REVISION = _mod_revision.NULL_REVISION
+ text_index_map, text_nodes = self._get_text_nodes()
+ for node in text_nodes:
+ # 0 - index
+ # 1 - key
+ # 2 - value
+ # 3 - refs
+ try:
+ ideal_parents = tuple(ideal_index[node[1]])
+ except KeyError:
+ discarded_nodes.append(node)
+ self._data_changed = True
+ else:
+ if ideal_parents == (NULL_REVISION,):
+ ideal_parents = ()
+ if ideal_parents == node[3][0]:
+ # no change needed.
+ ok_nodes.append(node)
+ elif ideal_parents[0:1] == node[3][0][0:1]:
+ # the left most parent is the same, or there are no parents
+ # today. Either way, we can preserve the representation as
+ # long as we change the refs to be inserted.
+ self._data_changed = True
+ ok_nodes.append((node[0], node[1], node[2],
+ (ideal_parents, node[3][1])))
+ self._data_changed = True
+ else:
+ # Reinsert this text completely
+ bad_texts.append((node[1], ideal_parents))
+ self._data_changed = True
+ # we're finished with some data.
+ del ideal_index
+ del text_nodes
+ # 3) bulk copy the ok data
+ list(self._copy_nodes_graph(ok_nodes, text_index_map,
+ self.new_pack._writer, self.new_pack.text_index))
+ # 3) adhoc copy all the other texts.
+ transaction = repo.get_transaction()
+ file_id_index = GraphIndexPrefixAdapter(
+ self.new_pack.text_index,
+ ('blank', ), 1,
+ add_nodes_callback=self.new_pack.text_index.add_nodes)
+ knit_index = KnitGraphIndex(file_id_index,
+ add_callback=file_id_index.add_nodes,
+ deltas=True, parents=True)
+ output_knit = knit.KnitVersionedFile('reconcile-texts',
+ self._pack_collection.transport,
+ None,
+ index=knit_index,
+ access_method=_PackAccess(
+ {self.new_pack.text_index:self.new_pack.access_tuple()},
+ (self.new_pack._writer, self.new_pack.text_index)),
+ factory=knit.KnitPlainFactory())
+ for key, parent_keys in bad_texts:
+ # We refer to the new pack to delta data being output.
+ # A possible improvement would be to catch errors on short reads
+ # and only flush then.
+ self.new_pack.flush()
+ parents = []
+ for parent_key in parent_keys:
+ if parent_key[0] != key[0]:
+ # Graph parents must match the fileid
+ raise errors.BzrError('Mismatched key parent %r:%r' %
+ (key, parent_keys))
+ parents.append(parent_key[1])
+ source_weave = repo.weave_store.get_weave(key[0], transaction)
+ text_lines = source_weave.get_lines(key[1])
+ # adapt the 'knit' to the current file_id.
+ file_id_index = GraphIndexPrefixAdapter(
+ self.new_pack.text_index,
+ (key[0], ), 1,
+ add_nodes_callback=self.new_pack.text_index.add_nodes)
+ knit_index._graph_index = file_id_index
+ knit_index._add_callback = file_id_index.add_nodes
+ output_knit.add_lines_with_ghosts(
+ key[1], parents, text_lines, random_id=True, check_content=False)
+ # 4) check that nothing inserted has a reference outside the keyspace.
+ missing_text_keys = self._external_compression_parents_of_new_texts()
+ if missing_text_keys:
+ raise errors.BzrError('Reference to missing compression parents %r'
+ % (refs - keys,))
+ self._log_copied_texts()
+
def _use_pack(self, new_pack):
"""Override _use_pack to check for reconcile having changed content."""
- self._data_changed = False
# XXX: we might be better checking this at the copy time.
original_inventory_keys = set()
inv_index = self._pack_collection.inventory_index.combined_index
@@ -1590,7 +1735,7 @@
self._transaction = None
# for tests
self._reconcile_does_inventory_gc = True
- self._reconcile_fixes_text_parents = False
+ self._reconcile_fixes_text_parents = True
self._reconcile_backsup_inventory = False
def _abort_write_group(self):
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-11-28 01:35:49 +0000
+++ b/bzrlib/repository.py 2007-11-29 04:14:06 +0000
@@ -1242,7 +1242,7 @@
raise errors.NoSuchIdInRepository(self, file_id)
yield callable_data, weave.get_lines(revision_id)
- def _generate_text_key_index(self):
+ def _generate_text_key_index(self, text_key_references=None):
"""Generate a new text key index for the repository.
This is an expensive function that will take considerable time to run.
@@ -1254,7 +1254,8 @@
# All revisions, to find inventory parents.
revision_graph = self.get_revision_graph_with_ghosts()
ancestors = revision_graph.get_ancestors()
- text_key_references = self.find_text_key_references()
+ if text_key_references is None:
+ text_key_references = self.find_text_key_references()
pb = ui.ui_factory.nested_progress_bar()
try:
return self._do_generate_text_key_index(ancestors,
More information about the bazaar-commits
mailing list