Rev 4042: (jam) Tweak the delta base selection for InterDifferingSerializer in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Feb 24 22:19:14 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4042
revision-id: pqm at pqm.ubuntu.com-20090224221910-i352mbfn0sa3bq4z
parent: pqm at pqm.ubuntu.com-20090224214044-953waog3od1tvlfr
parent: john at arbash-meinel.com-20090218182834-lvlzcj1q88bl269r
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-02-24 22:19:10 +0000
message:
  (jam) Tweak the delta base selection for InterDifferingSerializer
  	conversions.
modified:
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 4017.4.1
    revision-id: john at arbash-meinel.com-20090218182834-lvlzcj1q88bl269r
    parent: pqm at pqm.ubuntu.com-20090218132708-okubrahz9exvae9r
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: generic_fetch_delta_selection
    timestamp: Wed 2009-02-18 12:28:34 -0600
    message:
      Change the generic fetch logic to improve delta selection.
      
      The code was using an arbitrary base as the basis for its delta,
      instead we cache a few revision trees, and use the one that most-closely
      matches the final tree we want to end up with.
      
      For some conversions (chk) this has a rather large impact, as it
      means we have fewer redundant bits of the delta to apply.
    modified:
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-02-24 08:09:17 +0000
+++ b/bzrlib/repository.py	2009-02-24 22:19:10 +0000
@@ -3232,15 +3232,34 @@
             return False
         return True
 
-    def _fetch_batch(self, revision_ids, basis_id, basis_tree):
+    def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):
+        """Get the best delta and base for this revision.
+
+        :return: (basis_id, delta)
+        """
+        possible_trees = [(parent_id, cache[parent_id])
+                          for parent_id in parent_ids
+                           if parent_id in cache]
+        if len(possible_trees) == 0:
+            # There either aren't any parents, or the parents aren't in the
+            # cache, so just use the last converted tree
+            possible_trees.append((basis_id, cache[basis_id]))
+        deltas = []
+        for basis_id, basis_tree in possible_trees:
+            delta = tree.inventory._make_delta(basis_tree.inventory)
+            deltas.append((len(delta), basis_id, delta))
+        deltas.sort()
+        return deltas[0][1:]
+
+    def _fetch_batch(self, revision_ids, basis_id, cache):
         """Fetch across a few revisions.
 
         :param revision_ids: The revisions to copy
-        :param basis_id: The revision_id of basis_tree
-        :param basis_tree: A tree that is not in revision_ids which should
-            already exist in the target.
-        :return: (basis_id, basis_tree) A new basis to use now that these trees
-            have been copied.
+        :param basis_id: The revision_id of a tree that must be in cache, used
+            as a basis for delta when no other base is available
+        :param cache: A cache of RevisionTrees that we can use.
+        :return: The revision_id of the last converted tree. The RevisionTree
+            for it will be in cache
         """
         # Walk though all revisions; get inventory deltas, copy referenced
         # texts that delta references, insert the delta, revision and
@@ -3248,15 +3267,18 @@
         text_keys = set()
         pending_deltas = []
         pending_revisions = []
+        parent_map = self.source.get_parent_map(revision_ids)
         for tree in self.source.revision_trees(revision_ids):
             current_revision_id = tree.get_revision_id()
-            delta = tree.inventory._make_delta(basis_tree.inventory)
+            parent_ids = parent_map.get(current_revision_id, ())
+            basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
+                                                           basis_id, cache)
+            # Find text entries that need to be copied
             for old_path, new_path, file_id, entry in delta:
                 if new_path is not None:
                     if not (new_path or self.target.supports_rich_root()):
-                        # We leave the inventory delta in, because that
-                        # will have the deserialised inventory root
-                        # pointer.
+                        # We don't copy the text for the root node unless the
+                        # target supports_rich_root.
                         continue
                     # TODO: Do we need:
                     #       "if entry.revision == current_revision_id" ?
@@ -3266,8 +3288,8 @@
             pending_deltas.append((basis_id, delta,
                 current_revision_id, revision.parent_ids))
             pending_revisions.append(revision)
+            cache[current_revision_id] = tree
             basis_id = current_revision_id
-            basis_tree = tree
         # Copy file texts
         from_texts = self.source.texts
         to_texts = self.target.texts
@@ -3287,7 +3309,7 @@
             except errors.NoSuchRevision:
                 pass
             self.target.add_revision(revision.revision_id, revision)
-        return basis_id, basis_tree
+        return basis_id
 
     def _fetch_all_revisions(self, revision_ids, pb):
         """Fetch everything for the list of revisions.
@@ -3299,14 +3321,16 @@
         """
         basis_id, basis_tree = self._get_basis(revision_ids[0])
         batch_size = 100
+        cache = lru_cache.LRUCache(100)
+        cache[basis_id] = basis_tree
+        del basis_tree # We don't want to hang on to it here
         for offset in range(0, len(revision_ids), batch_size):
             self.target.start_write_group()
             try:
                 pb.update('Transferring revisions', offset,
                           len(revision_ids))
                 batch = revision_ids[offset:offset+batch_size]
-                basis_id, basis_tree = self._fetch_batch(batch,
-                    basis_id, basis_tree)
+                basis_id = self._fetch_batch(batch, basis_id, cache)
             except:
                 self.target.abort_write_group()
                 raise




More information about the bazaar-commits mailing list