Rev 4521: Rework the _stream_invs_as_deltas code a bit. in http://bazaar.launchpad.net/~jameinel/bzr/1.18-inventory-delta
John Arbash Meinel
john at arbash-meinel.com
Tue Jul 28 22:30:35 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.18-inventory-delta
------------------------------------------------------------
revno: 4521
revision-id: john at arbash-meinel.com-20090728213029-astk1w5u08va5ayj
parent: john at arbash-meinel.com-20090728205431-ygav9b3cjp2pbqzp
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-inventory-delta
timestamp: Tue 2009-07-28 16:30:29 -0500
message:
Rework the _stream_invs_as_deltas code a bit.
Add a cache, make it a bit clearer when we are building a delta from scratch
rather than from a parent.
Don't create a delta to the NULL_REVSION for *every* inventory, just for
ones without a viable parent.
-------------- next part --------------
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-07-28 20:54:31 +0000
+++ b/bzrlib/repository.py 2009-07-28 21:30:29 +0000
@@ -3511,6 +3511,7 @@
# This is redundant with format.check_conversion_target(), however that
# raises an exception, and we just want to say "False" as in we won't
# support converting between these formats.
+ return False
if source.supports_rich_root() and not target.supports_rich_root():
return False
if (source._format.supports_tree_reference
@@ -4234,7 +4235,7 @@
# Any time we switch serializations, we want to use an
# inventory-delta based approach.
return self._get_convertable_inventory_stream(revision_ids,
- fulltexts=missing)
+ delta_versus_null=missing)
def _get_simple_inventory_stream(self, revision_ids, missing=False):
# NB: This currently reopens the inventory weave in source;
@@ -4248,16 +4249,26 @@
[(rev_id,) for rev_id in revision_ids],
self.inventory_fetch_order(), delta_closure))
- def _get_convertable_inventory_stream(self, revision_ids, fulltexts=False):
+ def _get_convertable_inventory_stream(self, revision_ids,
+ delta_versus_null=False):
# The source is using CHKs, but the target either doesn't or is has a
# different serializer. The StreamSink code expects to be able to
# convert on the target, so we need to put bytes-on-the-wire that can
# be converted. That means inventory deltas (if the remote is <1.18,
# RemoteStreamSink will fallback to VFS to insert the deltas).
yield ('inventories',
- self._stream_invs_as_deltas(revision_ids, fulltexts=fulltexts))
-
- def _stream_invs_as_deltas(self, revision_ids, fulltexts=False):
+ self._stream_invs_as_deltas(revision_ids,
+ delta_versus_null=delta_versus_null))
+
+ def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
+ """Return a stream of inventory-deltas for the given rev ids.
+
+ :param revision_ids: The list of inventories to transmit
+ :param delta_versus_null: Don't try to find a minimal delta for this
+ entry, instead compute the delta versus the NULL_REVISION. This
+ effectively streams a complete inventory. Used for stuff like
+ filling in missing parents, etc.
+ """
from_repo = self.from_repository
revision_keys = [(rev_id,) for rev_id in revision_ids]
parent_map = from_repo.inventories.get_parent_map(revision_keys)
@@ -4271,39 +4282,42 @@
format = from_repo._format
flags = (format.rich_root_data, format.supports_tree_reference)
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
+ inventory_cache = lru_cache.LRUCache(50)
+ null_inventory = Inventory(None)
for inv in inventories:
key = (inv.revision_id,)
- parents = parent_map.get(key, ())
- if fulltexts or parents == ():
- # Either the caller asked for fulltexts, or there is no parent,
- # so, stream as a delta from null:.
- basis_id = _mod_revision.NULL_REVISION
- parent_inv = Inventory(None)
- delta = inv._make_delta(parent_inv)
- else:
- # Make a delta against each parent so that we can find the
- # smallest.
- best_delta = None
- parent_ids = [parent_key[0] for parent_key in parents]
- parent_ids.append(_mod_revision.NULL_REVISION)
+ parent_keys = parent_map.get(key, ())
+ delta = None
+ if not delta_versus_null and parent_keys:
+ # The caller did not ask for complete inventories and we have
+ # some parents that we can delta against. Make a delta against
+ # each parent so that we can find the smallest.
+ parent_ids = [parent_key[0] for parent_key in parent_keys]
for parent_id in parent_ids:
if parent_id not in invs_sent_so_far:
# We don't know that the remote side has this basis, so
# we can't use it.
continue
if parent_id == _mod_revision.NULL_REVISION:
- parent_inv = Inventory(None)
+ parent_inv = null_inventory
else:
- parent_inv = from_repo.get_inventory(parent_id)
+ parent_inv = inventory_cache.get(parent_id, None)
+ if parent_inv is None:
+ parent_inv = from_repo.get_inventory(parent_id)
candidate_delta = inv._make_delta(parent_inv)
- if (best_delta is None or
- len(best_delta) > len(candidate_delta)):
- best_delta = candidate_delta
+ if (delta is None or
+ len(delta) > len(candidate_delta)):
+ delta = candidate_delta
basis_id = parent_id
- delta = best_delta
- invs_sent_so_far.add(basis_id)
+ if delta is None:
+ # Either none of the parents ended up being suitable, or we
+ # were asked to delta against NULL
+ basis_id = _mod_revision.NULL_REVISION
+ delta = inv._make_delta(null_inventory)
+ invs_sent_so_far.add(inv.revision_id)
+ inventory_cache[inv.revision_id] = inv
yield versionedfile.InventoryDeltaContentFactory(
- key, parents, None, delta, basis_id, flags, from_repo)
+ key, parent_keys, None, delta, basis_id, flags, from_repo)
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
More information about the bazaar-commits
mailing list