Rev 3835: Keep the last 100 revisions when we process the next 100 revisions. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

John Arbash Meinel john at arbash-meinel.com
Tue Jan 6 19:59:36 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

------------------------------------------------------------
revno: 3835
revision-id: john at arbash-meinel.com-20090106195920-yo9p1al17e1mqzcv
parent: john at arbash-meinel.com-20090106193841-9dv2gx4k10ongrmw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Tue 2009-01-06 13:59:20 -0600
message:
  Keep the last 100 revisions when we process the next 100 revisions.
  This gives us a better hit rate, and is probably a reasonable trade-off point.
  We could probably decrease the batch size if we wanted to preserve the memory
  consumption.
-------------- next part --------------
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-12-24 21:48:06 +0000
+++ b/bzrlib/repository.py	2009-01-06 19:59:20 +0000
@@ -3277,15 +3277,17 @@
             return False
         return True
 
-    def _fetch_batch(self, revision_ids, basis_id, basis_tree, counter):
+    def _fetch_batch(self, revision_ids, basis_id, basis_tree, counter,
+                     last_batch):
         """Fetch across a few revisions.
 
         :param revision_ids: The revisions to copy
         :param basis_id: The revision_id of basis_tree
         :param basis_tree: A tree that is not in revision_ids which should
             already exist in the target.
-        :return: (basis_id, basis_tree) A new basis to use now that these trees
-            have been copied.
+        :param last_batch: The last revisions that we extracted
+        :return: (basis_id, basis_tree, new_batch) A new basis to use now that these trees
+            have been copied, and the new revisions we extracted.
         """
         # Walk though all revisions; get inventory deltas, copy referenced
         # texts that delta references, insert the delta, revision and
@@ -3293,11 +3295,11 @@
         text_keys = set()
         pending_deltas = []
         pending_revisions = []
-        recent_cache = {basis_id: basis_tree}
+        new_cache = {}
         parent_map = self.source.get_parent_map(revision_ids)
         for tree in self.source.revision_trees(revision_ids):
             current_revision_id = tree.get_revision_id()
-            recent_cache[current_revision_id] = tree
+            new_cache[current_revision_id] = tree
             parent_ids = parent_map.get(current_revision_id, [])
             if len(parent_ids) == 0:
                 # No parents to compare against, we just use the current basis
@@ -3309,11 +3311,15 @@
                     # The basis is the one parent, just use it
                     counter[0] += 1
                 else:
-                    if parent_id in recent_cache:
+                    if parent_id in new_cache:
                         # the one parent is already cached
                         counter[1] += 1
                         basis_id = parent_id
-                        basis_tree = recent_cache[basis_id]
+                        basis_tree = new_cache[basis_id]
+                    elif parent_id in last_batch:
+                        counter[5] += 1
+                        basis_id = parent_id
+                        basis_tree = last_batch[basis_id]
                     else:
                         # one parent, but it isn't in the cache
                         counter[2] += 1
@@ -3321,9 +3327,13 @@
             else: # More than one parent
                 deltas = []
                 for idx, parent_id in enumerate(parent_ids):
-                    if parent_id not in recent_cache:
+                    if parent_id in new_cache:
+                        parent_tree = new_cache[parent_id]
+                    elif parent_id in last_batch:
+                        parent_tree = last_batch[parent_id]
+                    else:
+                        # This parent is not in one of our caches
                         continue
-                    parent_tree = recent_cache[parent_id]
                     delta = tree.inventory._make_delta(parent_tree.inventory)
                     deltas.append((len(delta), idx, parent_id, parent_tree, delta))
                 deltas.sort()
@@ -3383,7 +3393,7 @@
             except errors.NoSuchRevision:
                 pass
             self.target.add_revision(revision.revision_id, revision)
-        return basis_id, basis_tree
+        return basis_id, basis_tree, new_cache
 
     def _fetch_all_revisions(self, revision_ids, pb):
         """Fetch everything for the list of revisions.
@@ -3395,25 +3405,26 @@
         """
         basis_id, basis_tree = self._get_basis(revision_ids[0])
         batch_size = 100
-        total_counter = [0, 0, 0, 0, 0]
+        last_batch = {}
+        total_counter = [0, 0, 0, 0, 0, 0]
         for offset in range(0, len(revision_ids), batch_size):
             self.target.start_write_group()
             try:
-                counter = [0, 0, 0, 0, 0]
+                counter = [0, 0, 0, 0, 0, 0]
                 pb.update('Transferring revisions', offset,
                           len(revision_ids))
                 batch = revision_ids[offset:offset+batch_size]
-                basis_id, basis_tree = self._fetch_batch(batch,
-                    basis_id, basis_tree, counter)
+                basis_id, basis_tree, last_batch = self._fetch_batch(batch,
+                    basis_id, basis_tree, counter, last_batch)
                 if total_counter[3] > 0:
                     avg_parent = float(total_counter[4]) / total_counter[3]
                 else:
                     avg_parent = 0
                 for idx in xrange(len(counter)):
                     total_counter[idx] += counter[idx]
-                note('Parent basis:%d cache:%d random:%d, multi: %d'
+                note('Parent basis:%d cache:%d last: %d, random:%d, multi: %d'
                      ' avg: %.3f, %s',
-                     counter[0], counter[1], counter[2], counter[3],
+                     counter[0], counter[1], counter[5], counter[2], counter[3],
                      avg_parent, total_counter)
             except:
                 self.target.abort_write_group()



More information about the bazaar-commits mailing list