Rev 4679: implement CHKInventoryRepository.get_delta_for_revisions in http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-get-deltas-bug374730

John Arbash Meinel john at arbash-meinel.com
Thu Sep 24 22:32:54 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-get-deltas-bug374730

------------------------------------------------------------
revno: 4679
revision-id: john at arbash-meinel.com-20090924213240-o6rpr4xv64ruht98
parent: john at arbash-meinel.com-20090924211450-c7y2v6ppian0d92u
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0.1-faster-get-deltas-bug374730
timestamp: Thu 2009-09-24 16:32:40 -0500
message:
  implement CHKInventoryRepository.get_delta_for_revisions
  
  This first implementation just maps file_ids => paths, and then calls
  tree.changes_from().
  This isn't ideal for a few reasons, but it should be better than what we've
  had.
-------------- next part --------------
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-09-24 20:03:43 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-09-24 21:32:40 +0000
@@ -23,6 +23,7 @@
     chk_map,
     chk_serializer,
     debug,
+    delta as _mod_delta,
     errors,
     index as _mod_index,
     inventory,
@@ -991,6 +992,65 @@
         finally:
             pb.finished()
 
+    def get_deltas_for_revisions(self, revisions, specific_fileids=None):
+        """Produce a generator of revision deltas.
+
+        Note that the input is a sequence of REVISIONS, not revision_ids.
+        Trees will be held in memory until the generator exits.
+        Each delta is relative to the revision's lefthand predecessor.
+
+        :param specific_fileids: if not None, the result is filtered
+          so that only those file-ids, their parents and their
+          children are included.
+        """
+        # Get the revision-ids of interest
+        required_trees = set()
+        for revision in revisions:
+            required_trees.add(revision.revision_id)
+            required_trees.update(revision.parent_ids[:1])
+
+        trees = dict((t.get_revision_id(), t) for
+            t in self.revision_trees(required_trees))
+
+        # Calculate the deltas
+        for revision in revisions:
+            if not revision.parent_ids:
+                old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
+            else:
+                old_tree = trees[revision.parent_ids[0]]
+            tree = trees[revision.revision_id]
+            if specific_fileids is None:
+                yield tree.changes_from(old_tree)
+                continue
+            # Unfortunately, iter_changes the interface is passed
+            # specific_files, but internally RevisionTrees do all the work in
+            # specific files... :(
+            paths = set()
+            for file_id in specific_fileids:
+                try:
+                    path = old_tree.id2path(file_id)
+                except errors.NoSuchId:
+                    pass # not present in this tree
+                else:
+                    paths.add(path)
+                try:
+                    path = tree.id2path(file_id)
+                except errors.NoSuchId:
+                    pass # not present in this tree
+                else:
+                    paths.add(path)
+            if not paths:
+                # None of the requested paths are versioned, and
+                # tree.changes_from() implements specific_files=[] as meaning
+                # compare *all* changes... :(
+                # Taken from InterTree.compare()
+                # All files are unversioned, so just return an empty delta
+                # _compare_trees would think we want a complete delta
+                yield _mod_delta.TreeDelta()
+            else:
+                yield tree.changes_from(old_tree, specific_files=paths)
+
+
     def _reconcile_pack(self, collection, packs, extension, revs, pb):
         packer = GCCHKReconcilePacker(collection, packs, extension)
         return packer.pack(pb)

=== modified file 'bzrlib/tests/per_repository/test_get_deltas_for_revisions.py'
--- a/bzrlib/tests/per_repository/test_get_deltas_for_revisions.py	2009-09-24 21:14:50 +0000
+++ b/bzrlib/tests/per_repository/test_get_deltas_for_revisions.py	2009-09-24 21:32:40 +0000
@@ -83,12 +83,8 @@
         deltas = list(repo.get_deltas_for_revisions(revisions,
                       specific_fileids=['subfile-id']))
         # Only modified in rev1 and rev5
-        self.assertTrue(deltas[0].has_changed())
-        self.assertFalse(deltas[1].has_changed())
-        self.assertFalse(deltas[2].has_changed())
-        self.assertFalse(deltas[3].has_changed())
-        self.assertTrue(deltas[4].has_changed())
-        self.assertFalse(deltas[5].has_changed())
+        self.assertEqual([True, False, False, False, True, False],
+                         [d.has_changed() for d in deltas])
 
     def test_filtered_dirs_include_children(self):
         repo = self.make_repo_with_history()
@@ -96,10 +92,6 @@
                      ['rev1', 'rev2', 'rev3', 'rev4', 'rev5', 'rev6']]
         deltas = list(repo.get_deltas_for_revisions(revisions,
                       specific_fileids=['dir-id']))
-        # child modified in all but rev2
-        self.assertTrue(deltas[0].has_changed())
-        self.assertFalse(deltas[1].has_changed())
-        self.assertTrue(deltas[2].has_changed())
-        self.assertTrue(deltas[3].has_changed())
-        self.assertTrue(deltas[4].has_changed())
-        self.assertTrue(deltas[5].has_changed())
+        # a child was modified in all but rev2
+        self.assertEqual([True, False, True, True, True, True],
+                         [d.has_changed() for d in deltas])



More information about the bazaar-commits mailing list