Rev 4669: We have iteration to parents working, need to find children now. in http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-log-dir-bug374730
John Arbash Meinel
john at arbash-meinel.com
Wed Sep 23 22:43:19 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.0.1-faster-log-dir-bug374730
------------------------------------------------------------
revno: 4669
revision-id: john at arbash-meinel.com-20090923214306-o0ecifj3bffgfrcr
parent: john at arbash-meinel.com-20090923211425-84kg8vwdz68mwvg3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0.1-faster-log-dir-bug374730
timestamp: Wed 2009-09-23 16:43:06 -0500
message:
We have iteration to parents working, need to find children now.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-08-30 22:02:45 +0000
+++ b/bzrlib/inventory.py 2009-09-23 21:43:06 +0000
@@ -1546,6 +1546,119 @@
else:
raise ValueError("unknown kind %r" % entry.kind)
+ def _expand_fileids_to_parents_and_children(self, file_ids):
+ """Give a more wholistic view starting with the given file_ids.
+
+ For any file_id which maps to a directory, we will include all children
+ of that directory. We will also include all directories which are
+ parents of the given file_ids, but we will not include their children.
+
+ eg:
+ / # TREE_ROOT
+ foo/ # foo-id
+ baz # baz-id
+ frob/ # frob-id
+ fringle # fringle-id
+ bar/ # bar-id
+ bing # bing-id
+
+ if given [foo-id] we will include
+ TREE_ROOT as interesting parents
+ and
+ foo-id, baz-id, frob-id, fringle-id
+ As interesting ids.
+ """
+ interesting_parents = set([None])
+ # TODO: Pre-pass over the list of fileids to see if anything is already
+ # deserialized in self._fileid_to_entry_cache
+
+ directories_to_expand = set()
+ children_of_parent_id = {}
+ # It is okay if some of the fileids are missing
+ for entry in self._getitems(file_ids):
+ if entry.kind == 'directory':
+ directories_to_expand.add(entry.file_id)
+ interesting_parents.add(entry.parent_id)
+ children_of_parent_id.setdefault(entry.parent_id, []).append(entry)
+
+ # Now, interesting_parents has all of the direct parents, but not the
+ # parents of those parents. It also may have some duplicates with
+ # specific_fileids
+ remaining_parents = interesting_parents.difference(file_ids)
+ while remaining_parents:
+ next_parents = set()
+ for entry in self._getitems(remaining_parents):
+ next_parents.add(entry.parent_id)
+ # Remove any search tips we've already processed
+ remaining_parents = next_parents.difference(interesting_parents)
+ interesting_parents.update(remaining_parents)
+ # We should probably also .difference(directories_to_expand)
+ interesting_parents.discard(None)
+ return interesting_parents, file_ids
+
+ def filter(self, specific_fileids):
+ """Get an inventory view filtered against a set of file-ids.
+
+ Children of directories and parents are included.
+
+ The result may or may not reference the underlying inventory
+ so it should be treated as immutable.
+ """
+ interesting_parents = set()
+ # TODO: Pre-pass over the list of fileids to see if anything is already
+ # deserialized in self._fileid_to_entry_cache
+
+ directories_to_expand = set()
+ children_of_parent_id = {}
+ # It is okay if some of the fileids are missing
+ bytes_to_entry = self._bytes_to_entry
+ for file_id, value in self.id_to_entry.iteritems(specific_fileids):
+ entry = bytes_to_entry(bytes)
+ if entry.kind == 'directory':
+ directories_to_expand.add(file_id)
+ interesting_parents.add(entry.parent_id)
+ children_of_parent_id.setdefault(entry.parent_id, []).append(entry)
+ # Now, interesting_parents has all of the direct parents, but not the
+ # parents of those parents. It also may have some duplicates with
+ # specific_fileids
+ remaining_parents = interesting_parents.difference(specific_fileids)
+ while remaining_parents:
+ next_parents = set()
+ for file_id, value in self.id_to_entry.iteritems(remaining_parents):
+ entry = bytes_to_entry(bytes)
+ next_parents.add(entry.parent_id)
+ # Remove any search tips we've already processed
+ remaining_parents = next_parents.difference(interesting_parents)
+
+ # So at this point, we know all the specific_fileids that are
+ # directories, and we know all of the parent ids which need to be
+ # included, but not recursed
+ for fileid in specific_fileids:
+ ie = self.id_to_entry
+ try:
+ interesting_parents.update(self.get_idpath(fileid))
+ except errors.NoSuchId:
+ # This fileid is not in the inventory - that's ok
+ pass
+ entries = self.iter_entries()
+ # TODO: ???
+ if self.root is None:
+ return Inventory(root_id=None)
+ other = Inventory(entries.next()[1].file_id)
+ other.root.revision = self.root.revision
+ other.revision_id = self.revision_id
+ directories_to_expand = set()
+ for path, entry in entries:
+ file_id = entry.file_id
+ if (file_id in specific_fileids
+ or entry.parent_id in directories_to_expand):
+ if entry.kind == 'directory':
+ directories_to_expand.add(file_id)
+ elif file_id not in interesting_parents:
+ continue
+ other.add(entry.copy())
+ return other
+
@staticmethod
def _bytes_to_utf8name_key(bytes):
"""Get the file_id, revision_id key out of bytes."""
@@ -1885,6 +1998,27 @@
# really we're passing an inventory, not a tree...
raise errors.NoSuchId(self, file_id)
+ def _getitems(self, file_ids):
+ """Similar to __getitem__, but lets you query for multiple.
+
+ The returned order is undefined. And currently if an item doesn't
+ exist, it isn't included in the output.
+ """
+ result = []
+ remaining = []
+ for file_id in file_ids:
+ entry = self._fileid_to_entry_cache.get(file_id, None)
+ if entry is None:
+ remaining.append(file_id)
+ else:
+ result.append(entry)
+ file_keys = [(f,) for f in remaining]
+ for file_key, value in self.id_to_entry.iteritems(file_keys):
+ entry = self._bytes_to_entry(value)
+ result.append(entry)
+ self._fileid_to_entry_cache[entry.file_id] = entry
+ return result
+
def has_id(self, file_id):
# Perhaps have an explicit 'contains' method on CHKMap ?
if self._fileid_to_entry_cache.get(file_id, None) is not None:
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-09-08 05:51:36 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-09-23 21:43:06 +0000
@@ -991,6 +991,35 @@
finally:
pb.finished()
+ def get_deltas_for_revisions(self, revisions, specific_fileids=None):
+ """Produce a generator of revision deltas.
+
+ Note that the input is a sequence of REVISIONS, not revision_ids.
+ Trees will be held in memory until the generator exits.
+ Each delta is relative to the revision's lefthand predecessor.
+
+ :param specific_fileids: if not None, the result is filtered
+ so that only those file-ids, their parents and their
+ children are included.
+ """
+ # Get the revision-ids of interest
+ required_trees = set()
+ for revision in revisions:
+ required_trees.add(revision.revision_id)
+ required_trees.update(revision.parent_ids[:1])
+
+ trees = dict((t.get_revision_id(), t) for
+ t in self.revision_trees(required_trees))
+
+ # Calculate the deltas
+ for revision in revisions:
+ if not revision.parent_ids:
+ old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
+ else:
+ old_tree = trees[revision.parent_ids[0]]
+ yield trees[revision.revision_id].changes_from(old_tree)
+
+
def _reconcile_pack(self, collection, packs, extension, revs, pb):
packer = GCCHKReconcilePacker(collection, packs, extension)
return packer.pack(pb)
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2009-09-23 21:14:25 +0000
+++ b/bzrlib/tests/test_inv.py 2009-09-23 21:43:06 +0000
@@ -1141,7 +1141,7 @@
inv.add(inv.make_entry('directory', name, parent_id, name + '-id'))
def make_file(self, inv, name, parent_id, content='content\n'):
- ie = inv.make_entry('directory', name, parent_id, name + '-id')
+ ie = inv.make_entry('file', name, parent_id, name + '-id')
ie.text_sha1 = osutils.sha_string(content)
ie.text_size = len(content)
inv.add(ie)
@@ -1168,9 +1168,21 @@
self.make_file(inv, 'subsub-file1', 'sub-dir1-id')
self.make_file(inv, 'sub2-file1', 'dir2-id')
chk_bytes = self.get_chk_bytes()
- return CHKInventory.from_inventory(chk_bytes, inv)
-
- def test_create_simple_inventory(self):
+ chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+ bytes = ''.join(chk_inv.to_lines())
+ return CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+
+ def assert_Getitems(self, expected_fileids, inv, file_ids):
+ self.assertEqual(sorted(expected_fileids),
+ sorted([ie.file_id for ie in inv._getitems(file_ids)]))
+
+ def assertExpand(self, parent_ids, other_ids, inv, file_ids):
+ val_parents, val_other = inv._expand_fileids_to_parents_and_children(
+ file_ids)
+ self.assertEqual(set(parent_ids), val_parents)
+ self.assertEqual(other_ids, val_other)
+
+ def test_make_simple_inventory(self):
inv = self.make_simple_inventory()
layout = []
for path, entry in inv.iter_entries_by_dir():
@@ -1186,3 +1198,26 @@
('dir1/sub-dir1/subsub-file1', 'subsub-file1-id'),
('dir2/sub2-file1', 'sub2-file1-id'),
], layout)
+
+ def test__getitems(self):
+ inv = self.make_simple_inventory()
+ # Reading from disk
+ self.assert_Getitems(['dir1-id'], inv, ['dir1-id'])
+ self.assertTrue('dir1-id' in inv._fileid_to_entry_cache)
+ self.assertFalse('sub-file2-id' in inv._fileid_to_entry_cache)
+ # From cache
+ self.assert_Getitems(['dir1-id'], inv, ['dir1-id'])
+ # Mixed
+ self.assert_Getitems(['dir1-id', 'sub-file2-id'], inv,
+ ['dir1-id', 'sub-file2-id'])
+ self.assertTrue('dir1-id' in inv._fileid_to_entry_cache)
+ self.assertTrue('sub-file2-id' in inv._fileid_to_entry_cache)
+
+ def test_single_file(self):
+ inv = self.make_simple_inventory()
+ self.assertExpand(['TREE_ROOT'], ['top-id'], inv, ['top-id'])
+
+ def test_get_all_parents(self):
+ inv = self.make_simple_inventory()
+ self.assertExpand(['TREE_ROOT', 'dir1-id', 'sub-dir1-id'],
+ ['subsub-file1-id'], inv, ['subsub-file1-id'])
More information about the bazaar-commits
mailing list