Rev 3824: Change the code to use an LRUCache. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/generic_fetch_ordering
John Arbash Meinel
john at arbash-meinel.com
Wed Feb 18 17:40:55 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/generic_fetch_ordering
------------------------------------------------------------
revno: 3824
revision-id: john at arbash-meinel.com-20090218174043-p6k4m0d4x9qchyxt
parent: john at arbash-meinel.com-20090218171343-4l9rgozo4hbj3l2h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: generic_fetch_ordering
timestamp: Wed 2009-02-18 11:40:43 -0600
message:
Change the code to use an LRUCache.
Seems to work just fine for what we need, and saves some headaches
with passing around the various dicts.
-------------- next part --------------
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-02-18 17:13:43 +0000
+++ b/bzrlib/repository.py 2009-02-18 17:40:43 +0000
@@ -3278,43 +3278,34 @@
return False
return True
- def _get_delta_for_revision(self, basis_id, basis_tree, last_batch,
- new_batch, tree, parent_ids):
+ def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):
"""Get the best delta and base for this revision.
- :return: (basis_id, basis_tree, delta)
+ :return: (basis_id, delta)
"""
- possible_trees = []
- for parent_id in parent_ids:
- if parent_id == basis_id:
- possible_trees.append((basis_id, basis_tree))
- elif parent_id in new_batch:
- possible_trees.append((parent_id, new_batch[parent_id]))
- elif parent_id in last_batch:
- possible_trees.append((parent_id, last_batch[parent_id]))
+ possible_trees = [(parent_id, cache[parent_id])
+ for parent_id in parent_ids
+ if parent_id in cache]
if len(possible_trees) == 0:
# There either aren't any parents, or the parents aren't in the
- # caches, so just use the last converted tree
- possible_trees.append((basis_id, basis_tree))
+ # cache, so just use the last converted tree
+ possible_trees.append((basis_id, cache[basis_id]))
deltas = []
for basis_id, basis_tree in possible_trees:
delta = tree.inventory._make_delta(basis_tree.inventory)
- deltas.append((len(delta), basis_id, basis_tree, delta))
+ deltas.append((len(delta), basis_id, delta))
deltas.sort()
return deltas[0][1:]
- def _fetch_batch(self, revision_ids, basis_id, basis_tree, last_batch):
+ def _fetch_batch(self, revision_ids, basis_id, cache):
"""Fetch across a few revisions.
:param revision_ids: The revisions to copy
- :param basis_id: The revision_id of basis_tree
- :param basis_tree: A tree that is not in revision_ids which should
- already exist in the target.
- :param last_batch: A dict of {revid: rev_tree}. Used as potential basis
- trees for computing a new delta.
- :return: (basis_id, basis_tree, new_batch) A new basis to use now that
- these trees have been copied, new_batch is a dict that can be
- passed back in via last_batch
+ :param basis_id: The revision_id of a tree that must be in cache, used
+ as a basis for delta when no other base is available
+ :param cache: A cache of RevisionTrees that we can use.
+ :return: The revision_id of the last converted tree. The RevisionTree
+ for it will be in cache
"""
# Walk though all revisions; get inventory deltas, copy referenced
# texts that delta references, insert the delta, revision and
@@ -3322,14 +3313,12 @@
text_keys = set()
pending_deltas = []
pending_revisions = []
- new_batch = {}
parent_map = self.source.get_parent_map(revision_ids)
for tree in self.source.revision_trees(revision_ids):
current_revision_id = tree.get_revision_id()
- new_batch[current_revision_id] = tree
parent_ids = parent_map.get(current_revision_id, ())
- basis_id, basis_tree, delta = self._get_delta_for_revision(
- basis_id, basis_tree, last_batch, new_batch, tree, parent_ids)
+ basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
+ basis_id, cache)
# Find text entries that need to be copied
for old_path, new_path, file_id, entry in delta:
if new_path is not None:
@@ -3345,8 +3334,8 @@
pending_deltas.append((basis_id, delta,
current_revision_id, revision.parent_ids))
pending_revisions.append(revision)
+ cache[current_revision_id] = tree
basis_id = current_revision_id
- basis_tree = tree
# Copy file texts
from_texts = self.source.texts
to_texts = self.target.texts
@@ -3366,7 +3355,7 @@
except errors.NoSuchRevision:
pass
self.target.add_revision(revision.revision_id, revision)
- return basis_id, basis_tree, new_batch
+ return basis_id
def _fetch_all_revisions(self, revision_ids, pb):
"""Fetch everything for the list of revisions.
@@ -3378,15 +3367,16 @@
"""
basis_id, basis_tree = self._get_basis(revision_ids[0])
batch_size = 100
- last_batch = {}
+ cache = lru_cache.LRUCache(100)
+ cache[basis_id] = basis_tree
+ del basis_tree # We don't want to hang on to it here
for offset in range(0, len(revision_ids), batch_size):
self.target.start_write_group()
try:
pb.update('Transferring revisions', offset,
len(revision_ids))
batch = revision_ids[offset:offset+batch_size]
- basis_id, basis_tree, last_batch = self._fetch_batch(batch,
- basis_id, basis_tree, last_batch)
+ basis_id = self._fetch_batch(batch, basis_id, cache)
except:
self.target.abort_write_group()
raise
More information about the bazaar-commits
mailing list