Rev 4441: Add a possible fix by thunking over to GroupCHKStreamSource for passing chk pages. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-bug387294
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 15 18:10:30 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-bug387294
------------------------------------------------------------
revno: 4441
revision-id: john at arbash-meinel.com-20090615171011-u7uruzpm6go01dmf
parent: john at arbash-meinel.com-20090615161127-9qexcwnyh8k2472b
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-bug387294
timestamp: Mon 2009-06-15 12:10:11 -0500
message:
Add a possible fix by thunking over to GroupCHKStreamSource for passing chk pages.
Possibly this is an inversion, especially since GroupCHKStreamSource inherits
from StreamSource. However, GCHK also maintains some state that doesn't seem
quite right onw StreamSource. Needs investigation.
-------------- next part --------------
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-06-12 01:11:00 +0000
+++ b/bzrlib/repository.py 2009-06-15 17:10:11 +0000
@@ -4294,6 +4294,7 @@
self.to_format.rich_root_data)
def _get_inventory_stream(self, revision_ids):
+ import pdb; pdb.set_trace()
from_format = self.from_repository._format
if (from_format.supports_chks and self.to_format.supports_chks
and (from_format._serializer == self.to_format._serializer)):
@@ -4323,70 +4324,17 @@
not self.delta_on_metadata()))
def _get_chk_inventory_stream(self, revision_ids):
- """Fetch the inventory texts, along with the associated chk maps."""
- # We want an inventory outside of the search set, so that we can filter
- # out uninteresting chk pages. For now we use
- # _find_revision_outside_set, but if we had a Search with cut_revs, we
- # could use that instead.
- start_rev_id = self.from_repository._find_revision_outside_set(
- revision_ids)
- start_rev_key = (start_rev_id,)
- inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]
- if start_rev_id != _mod_revision.NULL_REVISION:
- inv_keys_to_fetch.append((start_rev_id,))
- # Any repo that supports chk_bytes must also support out-of-order
- # insertion. At least, that is how we expect it to work
- # We use get_record_stream instead of iter_inventories because we want
- # to be able to insert the stream as well. We could instead fetch
- # allowing deltas, and then iter_inventories, but we don't know whether
- # source or target is more 'local' anway.
- inv_stream = self.from_repository.inventories.get_record_stream(
- inv_keys_to_fetch, 'unordered',
- True) # We need them as full-texts so we can find their references
- uninteresting_chk_roots = set()
- interesting_chk_roots = set()
- def filter_inv_stream(inv_stream):
- for idx, record in enumerate(inv_stream):
- ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
- bytes = record.get_bytes_as('fulltext')
- chk_inv = inventory.CHKInventory.deserialise(
- self.from_repository.chk_bytes, bytes, record.key)
- if record.key == start_rev_key:
- uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
- p_id_map = chk_inv.parent_id_basename_to_file_id
- if p_id_map is not None:
- uninteresting_chk_roots.add(p_id_map.key())
- else:
- yield record
- interesting_chk_roots.add(chk_inv.id_to_entry.key())
- p_id_map = chk_inv.parent_id_basename_to_file_id
- if p_id_map is not None:
- interesting_chk_roots.add(p_id_map.key())
- ### pb.update('fetch inventory', 0, 2)
- yield ('inventories', filter_inv_stream(inv_stream))
- # Now that we have worked out all of the interesting root nodes, grab
- # all of the interesting pages and insert them
- ### pb.update('fetch inventory', 1, 2)
- interesting = chk_map.iter_interesting_nodes(
- self.from_repository.chk_bytes, interesting_chk_roots,
- uninteresting_chk_roots)
- def to_stream_adapter():
- """Adapt the iter_interesting_nodes result to a single stream.
-
- iter_interesting_nodes returns records as it processes them, along
- with keys. However, we only want to return the records themselves.
- """
- for record, items in interesting:
- if record is not None:
- yield record
- # XXX: We could instead call get_record_stream(records.keys())
- # ATM, this will always insert the records as fulltexts, and
- # requires that you can hang on to records once you have gone
- # on to the next one. Further, it causes the target to
- # recompress the data. Testing shows it to be faster than
- # requesting the records again, though.
- yield ('chk_bytes', to_stream_adapter())
- ### pb.update('fetch inventory', 2, 2)
+ # All the code for how to read the inventory pages, and determine the
+ # chk pages is already present in GroupCHKStreamSource, so just thunk
+ # over to that StreamSource to do the heavy lifting here
+ from bzrlib.repofmt.groupcompress_repo import GroupCHKStreamSource
+ alt_source = GroupCHKStreamSource(self.from_repository, self.to_format)
+ revision_keys = [(r,) for r in revision_ids]
+ yield alt_source._get_inventory_stream(revision_keys)
+ from_repo = self.from_repository
+ parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
+ for stream_info in alt_source._get_filtered_chk_streams(parent_ids):
+ yield stream_info
def _get_convertable_inventory_stream(self, revision_ids):
# XXX: One of source or target is using chks, and they don't have
More information about the bazaar-commits
mailing list