Rev 3835: Keep the last 100 revisions when we process the next 100 revisions. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack
John Arbash Meinel
john at arbash-meinel.com
Tue Jan 6 19:59:36 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack
------------------------------------------------------------
revno: 3835
revision-id: john at arbash-meinel.com-20090106195920-yo9p1al17e1mqzcv
parent: john at arbash-meinel.com-20090106193841-9dv2gx4k10ongrmw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Tue 2009-01-06 13:59:20 -0600
message:
Keep the last 100 revisions when we process the next 100 revisions.
This gives us a better hit rate, and is probably a reasonable trade-off point.
We could probably decrease the batch size if we wanted to preserve the memory
consumption.
-------------- next part --------------
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-12-24 21:48:06 +0000
+++ b/bzrlib/repository.py 2009-01-06 19:59:20 +0000
@@ -3277,15 +3277,17 @@
return False
return True
- def _fetch_batch(self, revision_ids, basis_id, basis_tree, counter):
+ def _fetch_batch(self, revision_ids, basis_id, basis_tree, counter,
+ last_batch):
"""Fetch across a few revisions.
:param revision_ids: The revisions to copy
:param basis_id: The revision_id of basis_tree
:param basis_tree: A tree that is not in revision_ids which should
already exist in the target.
- :return: (basis_id, basis_tree) A new basis to use now that these trees
- have been copied.
+ :param last_batch: The last revisions that we extracted
+ :return: (basis_id, basis_tree, new_batch) A new basis to use now that these trees
+ have been copied, and the new revisions we extracted.
"""
# Walk though all revisions; get inventory deltas, copy referenced
# texts that delta references, insert the delta, revision and
@@ -3293,11 +3295,11 @@
text_keys = set()
pending_deltas = []
pending_revisions = []
- recent_cache = {basis_id: basis_tree}
+ new_cache = {}
parent_map = self.source.get_parent_map(revision_ids)
for tree in self.source.revision_trees(revision_ids):
current_revision_id = tree.get_revision_id()
- recent_cache[current_revision_id] = tree
+ new_cache[current_revision_id] = tree
parent_ids = parent_map.get(current_revision_id, [])
if len(parent_ids) == 0:
# No parents to compare against, we just use the current basis
@@ -3309,11 +3311,15 @@
# The basis is the one parent, just use it
counter[0] += 1
else:
- if parent_id in recent_cache:
+ if parent_id in new_cache:
# the one parent is already cached
counter[1] += 1
basis_id = parent_id
- basis_tree = recent_cache[basis_id]
+ basis_tree = new_cache[basis_id]
+ elif parent_id in last_batch:
+ counter[5] += 1
+ basis_id = parent_id
+ basis_tree = last_batch[basis_id]
else:
# one parent, but it isn't in the cache
counter[2] += 1
@@ -3321,9 +3327,13 @@
else: # More than one parent
deltas = []
for idx, parent_id in enumerate(parent_ids):
- if parent_id not in recent_cache:
+ if parent_id in new_cache:
+ parent_tree = new_cache[parent_id]
+ elif parent_id in last_batch:
+ parent_tree = last_batch[parent_id]
+ else:
+ # This parent is not in one of our caches
continue
- parent_tree = recent_cache[parent_id]
delta = tree.inventory._make_delta(parent_tree.inventory)
deltas.append((len(delta), idx, parent_id, parent_tree, delta))
deltas.sort()
@@ -3383,7 +3393,7 @@
except errors.NoSuchRevision:
pass
self.target.add_revision(revision.revision_id, revision)
- return basis_id, basis_tree
+ return basis_id, basis_tree, new_cache
def _fetch_all_revisions(self, revision_ids, pb):
"""Fetch everything for the list of revisions.
@@ -3395,25 +3405,26 @@
"""
basis_id, basis_tree = self._get_basis(revision_ids[0])
batch_size = 100
- total_counter = [0, 0, 0, 0, 0]
+ last_batch = {}
+ total_counter = [0, 0, 0, 0, 0, 0]
for offset in range(0, len(revision_ids), batch_size):
self.target.start_write_group()
try:
- counter = [0, 0, 0, 0, 0]
+ counter = [0, 0, 0, 0, 0, 0]
pb.update('Transferring revisions', offset,
len(revision_ids))
batch = revision_ids[offset:offset+batch_size]
- basis_id, basis_tree = self._fetch_batch(batch,
- basis_id, basis_tree, counter)
+ basis_id, basis_tree, last_batch = self._fetch_batch(batch,
+ basis_id, basis_tree, counter, last_batch)
if total_counter[3] > 0:
avg_parent = float(total_counter[4]) / total_counter[3]
else:
avg_parent = 0
for idx in xrange(len(counter)):
total_counter[idx] += counter[idx]
- note('Parent basis:%d cache:%d random:%d, multi: %d'
+ note('Parent basis:%d cache:%d last: %d, random:%d, multi: %d'
' avg: %.3f, %s',
- counter[0], counter[1], counter[2], counter[3],
+ counter[0], counter[1], counter[5], counter[2], counter[3],
avg_parent, total_counter)
except:
self.target.abort_write_group()
More information about the bazaar-commits
mailing list