Rev 4669: We have iteration to parents working, need to find children now. in http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-log-dir-bug374730

Wed Sep 23 22:43:19 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-log-dir-bug374730

------------------------------------------------------------
revno: 4669
revision-id: john at arbash-meinel.com-20090923214306-o0ecifj3bffgfrcr
parent: john at arbash-meinel.com-20090923211425-84kg8vwdz68mwvg3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0.1-faster-log-dir-bug374730
timestamp: Wed 2009-09-23 16:43:06 -0500
message:
  We have iteration to parents working, need to find children now.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'

--- a/bzrlib/inventory.py	2009-08-30 22:02:45 +0000
+++ b/bzrlib/inventory.py	2009-09-23 21:43:06 +0000
@@ -1546,6 +1546,119 @@
         else:
             raise ValueError("unknown kind %r" % entry.kind)
 
+    def _expand_fileids_to_parents_and_children(self, file_ids):
+        """Give a more wholistic view starting with the given file_ids.
+
+        For any file_id which maps to a directory, we will include all children
+        of that directory. We will also include all directories which are
+        parents of the given file_ids, but we will not include their children.
+
+        eg:
+          /     # TREE_ROOT
+          foo/  # foo-id
+            baz # baz-id
+            frob/ # frob-id
+              fringle # fringle-id
+          bar/  # bar-id
+            bing # bing-id
+
+        if given [foo-id] we will include
+            TREE_ROOT as interesting parents
+        and 
+            foo-id, baz-id, frob-id, fringle-id
+        As interesting ids.
+        """
+        interesting_parents = set([None])
+        # TODO: Pre-pass over the list of fileids to see if anything is already
+        #       deserialized in self._fileid_to_entry_cache
+
+        directories_to_expand = set()
+        children_of_parent_id = {}
+        # It is okay if some of the fileids are missing
+        for entry in self._getitems(file_ids):
+            if entry.kind == 'directory':
+                directories_to_expand.add(entry.file_id)
+            interesting_parents.add(entry.parent_id)
+            children_of_parent_id.setdefault(entry.parent_id, []).append(entry)
+
+        # Now, interesting_parents has all of the direct parents, but not the
+        # parents of those parents. It also may have some duplicates with
+        # specific_fileids
+        remaining_parents = interesting_parents.difference(file_ids)
+        while remaining_parents:
+            next_parents = set()
+            for entry in self._getitems(remaining_parents):
+                next_parents.add(entry.parent_id)
+            # Remove any search tips we've already processed
+            remaining_parents = next_parents.difference(interesting_parents)
+            interesting_parents.update(remaining_parents)
+            # We should probably also .difference(directories_to_expand)
+        interesting_parents.discard(None)
+        return interesting_parents, file_ids
+
+    def filter(self, specific_fileids):
+        """Get an inventory view filtered against a set of file-ids.
+
+        Children of directories and parents are included.
+
+        The result may or may not reference the underlying inventory
+        so it should be treated as immutable.
+        """
+        interesting_parents = set()
+        # TODO: Pre-pass over the list of fileids to see if anything is already
+        #       deserialized in self._fileid_to_entry_cache
+
+        directories_to_expand = set()
+        children_of_parent_id = {}
+        # It is okay if some of the fileids are missing
+        bytes_to_entry = self._bytes_to_entry
+        for file_id, value in self.id_to_entry.iteritems(specific_fileids): 
+            entry = bytes_to_entry(bytes)
+            if entry.kind == 'directory':
+                directories_to_expand.add(file_id)
+            interesting_parents.add(entry.parent_id)
+            children_of_parent_id.setdefault(entry.parent_id, []).append(entry)
+        # Now, interesting_parents has all of the direct parents, but not the
+        # parents of those parents. It also may have some duplicates with
+        # specific_fileids
+        remaining_parents = interesting_parents.difference(specific_fileids)
+        while remaining_parents:
+            next_parents = set()
+            for file_id, value in self.id_to_entry.iteritems(remaining_parents):
+                entry = bytes_to_entry(bytes)
+                next_parents.add(entry.parent_id)
+            # Remove any search tips we've already processed
+            remaining_parents = next_parents.difference(interesting_parents)
+
+        # So at this point, we know all the specific_fileids that are
+        # directories, and we know all of the parent ids which need to be
+        # included, but not recursed
+        for fileid in specific_fileids:
+            ie = self.id_to_entry
+            try:
+                interesting_parents.update(self.get_idpath(fileid))
+            except errors.NoSuchId:
+                # This fileid is not in the inventory - that's ok
+                pass
+        entries = self.iter_entries()
+        # TODO: ???
+        if self.root is None:
+            return Inventory(root_id=None)
+        other = Inventory(entries.next()[1].file_id)
+        other.root.revision = self.root.revision
+        other.revision_id = self.revision_id
+        directories_to_expand = set()
+        for path, entry in entries:
+            file_id = entry.file_id
+            if (file_id in specific_fileids
+                or entry.parent_id in directories_to_expand):
+                if entry.kind == 'directory':
+                    directories_to_expand.add(file_id)
+            elif file_id not in interesting_parents:
+                continue
+            other.add(entry.copy())
+        return other
+
     @staticmethod
     def _bytes_to_utf8name_key(bytes):
         """Get the file_id, revision_id key out of bytes."""
@@ -1885,6 +1998,27 @@
             # really we're passing an inventory, not a tree...
             raise errors.NoSuchId(self, file_id)
 
+    def _getitems(self, file_ids):
+        """Similar to __getitem__, but lets you query for multiple.
+        
+        The returned order is undefined. And currently if an item doesn't
+        exist, it isn't included in the output.
+        """
+        result = []
+        remaining = []
+        for file_id in file_ids:
+            entry = self._fileid_to_entry_cache.get(file_id, None)
+            if entry is None:
+                remaining.append(file_id)
+            else:
+                result.append(entry)
+        file_keys = [(f,) for f in remaining]
+        for file_key, value in self.id_to_entry.iteritems(file_keys):
+            entry = self._bytes_to_entry(value)
+            result.append(entry)
+            self._fileid_to_entry_cache[entry.file_id] = entry
+        return result
+
     def has_id(self, file_id):
         # Perhaps have an explicit 'contains' method on CHKMap ?
         if self._fileid_to_entry_cache.get(file_id, None) is not None:

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-09-08 05:51:36 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-09-23 21:43:06 +0000
@@ -991,6 +991,35 @@
         finally:
             pb.finished()
 
+    def get_deltas_for_revisions(self, revisions, specific_fileids=None):
+        """Produce a generator of revision deltas.
+
+        Note that the input is a sequence of REVISIONS, not revision_ids.
+        Trees will be held in memory until the generator exits.
+        Each delta is relative to the revision's lefthand predecessor.
+
+        :param specific_fileids: if not None, the result is filtered
+          so that only those file-ids, their parents and their
+          children are included.
+        """
+        # Get the revision-ids of interest
+        required_trees = set()
+        for revision in revisions:
+            required_trees.add(revision.revision_id)
+            required_trees.update(revision.parent_ids[:1])
+
+        trees = dict((t.get_revision_id(), t) for
+            t in self.revision_trees(required_trees))
+
+        # Calculate the deltas
+        for revision in revisions:
+            if not revision.parent_ids:
+                old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
+            else:
+                old_tree = trees[revision.parent_ids[0]]
+            yield trees[revision.revision_id].changes_from(old_tree)
+
+
     def _reconcile_pack(self, collection, packs, extension, revs, pb):
         packer = GCCHKReconcilePacker(collection, packs, extension)
         return packer.pack(pb)

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2009-09-23 21:14:25 +0000
+++ b/bzrlib/tests/test_inv.py	2009-09-23 21:43:06 +0000
@@ -1141,7 +1141,7 @@
         inv.add(inv.make_entry('directory', name, parent_id, name + '-id'))
 
     def make_file(self, inv, name, parent_id, content='content\n'):
-        ie = inv.make_entry('directory', name, parent_id, name + '-id')
+        ie = inv.make_entry('file', name, parent_id, name + '-id')
         ie.text_sha1 = osutils.sha_string(content)
         ie.text_size = len(content)
         inv.add(ie)
@@ -1168,9 +1168,21 @@
         self.make_file(inv, 'subsub-file1', 'sub-dir1-id')
         self.make_file(inv, 'sub2-file1', 'dir2-id')
         chk_bytes = self.get_chk_bytes()
-        return CHKInventory.from_inventory(chk_bytes, inv)
-
-    def test_create_simple_inventory(self):
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+        bytes = ''.join(chk_inv.to_lines())
+        return CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+
+    def assert_Getitems(self, expected_fileids, inv, file_ids):
+        self.assertEqual(sorted(expected_fileids),
+                         sorted([ie.file_id for ie in inv._getitems(file_ids)]))
+
+    def assertExpand(self, parent_ids, other_ids, inv, file_ids):
+        val_parents, val_other = inv._expand_fileids_to_parents_and_children(
+                                    file_ids)
+        self.assertEqual(set(parent_ids), val_parents)
+        self.assertEqual(other_ids, val_other)
+
+    def test_make_simple_inventory(self):
         inv = self.make_simple_inventory()
         layout = []
         for path, entry in inv.iter_entries_by_dir():
@@ -1186,3 +1198,26 @@
             ('dir1/sub-dir1/subsub-file1', 'subsub-file1-id'),
             ('dir2/sub2-file1', 'sub2-file1-id'),
             ], layout)
+
+    def test__getitems(self):
+        inv = self.make_simple_inventory()
+        # Reading from disk
+        self.assert_Getitems(['dir1-id'], inv, ['dir1-id'])
+        self.assertTrue('dir1-id' in inv._fileid_to_entry_cache)
+        self.assertFalse('sub-file2-id' in inv._fileid_to_entry_cache)
+        # From cache
+        self.assert_Getitems(['dir1-id'], inv, ['dir1-id'])
+        # Mixed
+        self.assert_Getitems(['dir1-id', 'sub-file2-id'], inv,
+                             ['dir1-id', 'sub-file2-id'])
+        self.assertTrue('dir1-id' in inv._fileid_to_entry_cache)
+        self.assertTrue('sub-file2-id' in inv._fileid_to_entry_cache)
+
+    def test_single_file(self):
+        inv = self.make_simple_inventory()
+        self.assertExpand(['TREE_ROOT'], ['top-id'], inv, ['top-id'])
+
+    def test_get_all_parents(self):
+        inv = self.make_simple_inventory()
+        self.assertExpand(['TREE_ROOT', 'dir1-id', 'sub-dir1-id'], 
+                          ['subsub-file1-id'], inv, ['subsub-file1-id'])