Rev 4181: filtered deltas (Ian Clatworthy) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Sun Mar 22 02:16:23 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4181
revision-id: pqm at pqm.ubuntu.com-20090322021619-onz77khivwa932zn
parent: pqm at pqm.ubuntu.com-20090321041623-7boodzsp5z8v22lu
parent: ian.clatworthy at canonical.com-20090322013044-bjfkjggp576hwmwd
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sun 2009-03-22 02:16:19 +0000
message:
filtered deltas (Ian Clatworthy)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/inventory_implementations/basics.py basics.py-20070903044446-kdjwbiu1p1zi9phs-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
------------------------------------------------------------
revno: 4180.1.1
revision-id: ian.clatworthy at canonical.com-20090322013044-bjfkjggp576hwmwd
parent: pqm at pqm.ubuntu.com-20090321041623-7boodzsp5z8v22lu
parent: ian.clatworthy at canonical.com-20090322012543-1s63ca9a95jda5wq
committer: Ian Clatworthy <ian.clatworthy at canonical.com>
branch nick: ianc-integration
timestamp: Sun 2009-03-22 11:30:44 +1000
message:
filtered deltas (Ian Clatworthy)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/inventory_implementations/basics.py basics.py-20070903044446-kdjwbiu1p1zi9phs-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
------------------------------------------------------------
revno: 4137.3.3
revision-id: ian.clatworthy at canonical.com-20090322012543-1s63ca9a95jda5wq
parent: ian.clatworthy at canonical.com-20090313015944-6qcysjxyy0yun84d
committer: Ian Clatworthy <ian.clatworthy at canonical.com>
branch nick: bzr.filtered-deltas
timestamp: Sun 2009-03-22 11:25:43 +1000
message:
tweak comments to set immutable expectation
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
------------------------------------------------------------
revno: 4137.3.2
revision-id: ian.clatworthy at canonical.com-20090313015944-6qcysjxyy0yun84d
parent: ian.clatworthy at canonical.com-20090313002941-0y35d4gxh0ck3pdc
committer: Ian Clatworthy <ian.clatworthy at canonical.com>
branch nick: bzr.filtered-deltas
timestamp: Fri 2009-03-13 11:59:44 +1000
message:
Repository.get_deltas_for_revisions() now supports file-id filtering
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
------------------------------------------------------------
revno: 4137.3.1
revision-id: ian.clatworthy at canonical.com-20090313002941-0y35d4gxh0ck3pdc
parent: pqm at pqm.ubuntu.com-20090312152735-08tf1fguf6b7mpaw
committer: Ian Clatworthy <ian.clatworthy at canonical.com>
branch nick: bzr.filtered-inventories
timestamp: Fri 2009-03-13 10:29:41 +1000
message:
Inventory.filter() API with tests
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/tests/inventory_implementations/basics.py basics.py-20070903044446-kdjwbiu1p1zi9phs-1
=== modified file 'NEWS'
--- a/NEWS 2009-03-21 02:36:10 +0000
+++ b/NEWS 2009-03-22 01:30:44 +0000
@@ -121,10 +121,20 @@
API Changes
***********
+* New API ``Inventory.filter()`` added that filters an inventory by
+ a set of file-ids so that only those fileids, their parents and
+ their children are included. (Ian Clatworthy)
+
* New sort order for ``get_record_stream`` ``groupcompress`` which
sorts optimally for use with groupcompress compressors. (John Arbash
Meinel, Robert Collins)
+* Repository APIs ``get_deltas_for_revisions()`` and
+ ``get_revision_delta()`` now support an optional ``specific_fileids``
+ parameter. If provided, the deltas are filtered so that only those
+ file-ids, their parents and their children are included.
+ (Ian Clatworthy)
+
* The ``get_credentials`` and ``set_credentials`` methods of
``AuthenticationConfig`` now accept an optional realm argument.
(Jean-Francois Roy)
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-03-16 22:22:06 +0000
+++ b/bzrlib/inventory.py 2009-03-22 01:30:44 +0000
@@ -1338,6 +1338,39 @@
def is_root(self, file_id):
return self.root is not None and file_id == self.root.file_id
+ def filter(self, specific_fileids):
+ """Get an inventory view filtered against a set of file-ids.
+
+ Children of directories and parents are included.
+
+ The result may or may not reference the underlying inventory
+ so it should be treated as immutable.
+ """
+ interesting_parents = set()
+ for fileid in specific_fileids:
+ try:
+ interesting_parents.update(self.get_idpath(fileid))
+ except errors.NoSuchId:
+ # This fileid is not in the inventory - that's ok
+ pass
+ entries = self.iter_entries()
+ if self.root is None:
+ return Inventory(root_id=None)
+ other = Inventory(entries.next()[1].file_id)
+ other.root.revision = self.root.revision
+ other.revision_id = self.revision_id
+ directories_to_expand = set()
+ for path, entry in entries:
+ file_id = entry.file_id
+ if (file_id in specific_fileids
+ or entry.parent_id in directories_to_expand):
+ if entry.kind == 'directory':
+ directories_to_expand.add(file_id)
+ elif file_id not in interesting_parents:
+ continue
+ other.add(entry.copy())
+ return other
+
entry_factory = {
'directory': InventoryDirectory,
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2009-03-18 01:27:58 +0000
+++ b/bzrlib/remote.py 2009-03-22 01:30:44 +0000
@@ -1315,14 +1315,16 @@
return self._real_repository.all_revision_ids()
@needs_read_lock
- def get_deltas_for_revisions(self, revisions):
+ def get_deltas_for_revisions(self, revisions, specific_fileids=None):
self._ensure_real()
- return self._real_repository.get_deltas_for_revisions(revisions)
+ return self._real_repository.get_deltas_for_revisions(revisions,
+ specific_fileids=specific_fileids)
@needs_read_lock
- def get_revision_delta(self, revision_id):
+ def get_revision_delta(self, revision_id, specific_fileids=None):
self._ensure_real()
- return self._real_repository.get_revision_delta(revision_id)
+ return self._real_repository.get_revision_delta(revision_id,
+ specific_fileids=specific_fileids)
@needs_read_lock
def revision_trees(self, revision_ids):
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-03-18 01:27:58 +0000
+++ b/bzrlib/repository.py 2009-03-22 01:30:44 +0000
@@ -1336,19 +1336,38 @@
rev_tmp.seek(0)
return rev_tmp.getvalue()
- def get_deltas_for_revisions(self, revisions):
+ def get_deltas_for_revisions(self, revisions, specific_fileids=None):
"""Produce a generator of revision deltas.
Note that the input is a sequence of REVISIONS, not revision_ids.
Trees will be held in memory until the generator exits.
Each delta is relative to the revision's lefthand predecessor.
+
+ :param specific_fileids: if not None, the result is filtered
+ so that only those file-ids, their parents and their
+ children are included.
"""
+ # Get the revision-ids of interest
required_trees = set()
for revision in revisions:
required_trees.add(revision.revision_id)
required_trees.update(revision.parent_ids[:1])
- trees = dict((t.get_revision_id(), t) for
- t in self.revision_trees(required_trees))
+
+ # Get the matching filtered trees. Note that it's more
+ # efficient to pass filtered trees to changes_from() rather
+ # than doing the filtering afterwards. changes_from() could
+ # arguably do the filtering itself but it's path-based, not
+ # file-id based, so filtering before or afterwards is
+ # currently easier.
+ if specific_fileids is None:
+ trees = dict((t.get_revision_id(), t) for
+ t in self.revision_trees(required_trees))
+ else:
+ trees = dict((t.get_revision_id(), t) for
+ t in self._filtered_revision_trees(required_trees,
+ specific_fileids))
+
+ # Calculate the deltas
for revision in revisions:
if not revision.parent_ids:
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
@@ -1357,14 +1376,19 @@
yield trees[revision.revision_id].changes_from(old_tree)
@needs_read_lock
- def get_revision_delta(self, revision_id):
+ def get_revision_delta(self, revision_id, specific_fileids=None):
"""Return the delta for one revision.
The delta is relative to the left-hand predecessor of the
revision.
+
+ :param specific_fileids: if not None, the result is filtered
+ so that only those file-ids, their parents and their
+ children are included.
"""
r = self.get_revision(revision_id)
- return list(self.get_deltas_for_revisions([r]))[0]
+ return list(self.get_deltas_for_revisions([r],
+ specific_fileids=specific_fileids))[0]
@needs_write_lock
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
@@ -1882,13 +1906,31 @@
return RevisionTree(self, inv, revision_id)
def revision_trees(self, revision_ids):
- """Return Tree for a revision on this branch.
+ """Return Trees for revisions in this repository.
- `revision_id` may not be None or 'null:'"""
+ :param revision_ids: a sequence of revision-ids;
+ a revision-id may not be None or 'null:'
+ """
inventories = self.iter_inventories(revision_ids)
for inv in inventories:
yield RevisionTree(self, inv, inv.revision_id)
+ def _filtered_revision_trees(self, revision_ids, file_ids):
+ """Return Tree for a revision on this branch with only some files.
+
+ :param revision_ids: a sequence of revision-ids;
+ a revision-id may not be None or 'null:'
+ :param file_ids: if not None, the result is filtered
+ so that only those file-ids, their parents and their
+ children are included.
+ """
+ inventories = self.iter_inventories(revision_ids)
+ for inv in inventories:
+ # Should we introduce a FilteredRevisionTree class rather
+ # than pre-filter the inventory here?
+ filtered_inv = inv.filter(file_ids)
+ yield RevisionTree(self, filtered_inv, filtered_inv.revision_id)
+
@needs_read_lock
def get_ancestry(self, revision_id, topo_sorted=True):
"""Return a list of revision-ids integrated by a revision.
=== modified file 'bzrlib/tests/inventory_implementations/basics.py'
--- a/bzrlib/tests/inventory_implementations/basics.py 2009-03-12 08:12:18 +0000
+++ b/bzrlib/tests/inventory_implementations/basics.py 2009-03-13 00:29:41 +0000
@@ -42,6 +42,20 @@
def make_inventory(self, root_id):
return self.inventory_class(root_id=root_id)
+ def prepare_inv_with_nested_dirs(self):
+ inv = self.make_inventory('tree-root')
+ for args in [('src', 'directory', 'src-id'),
+ ('doc', 'directory', 'doc-id'),
+ ('src/hello.c', 'file', 'hello-id'),
+ ('src/bye.c', 'file', 'bye-id'),
+ ('zz', 'file', 'zz-id'),
+ ('src/sub/', 'directory', 'sub-id'),
+ ('src/zz.c', 'file', 'zzc-id'),
+ ('src/sub/a', 'file', 'a-id'),
+ ('Makefile', 'file', 'makefile-id')]:
+ inv.add_path(*args)
+ return inv
+
class TestInventoryUpdates(TestInventory):
@@ -209,17 +223,7 @@
], [(path, ie.file_id) for path, ie in inv.iter_entries()])
def test_iter_entries_by_dir(self):
- inv = self.make_inventory('tree-root')
- for args in [('src', 'directory', 'src-id'),
- ('doc', 'directory', 'doc-id'),
- ('src/hello.c', 'file', 'hello-id'),
- ('src/bye.c', 'file', 'bye-id'),
- ('zz', 'file', 'zz-id'),
- ('src/sub/', 'directory', 'sub-id'),
- ('src/zz.c', 'file', 'zzc-id'),
- ('src/sub/a', 'file', 'a-id'),
- ('Makefile', 'file', 'makefile-id')]:
- inv.add_path(*args)
+ inv = self. prepare_inv_with_nested_dirs()
self.assertEqual([
('', 'tree-root'),
('Makefile', 'makefile-id'),
@@ -283,3 +287,50 @@
('src/bye.c', 'bye-id'),
], [(path, ie.file_id) for path, ie in inv.iter_entries_by_dir(
specific_file_ids=('bye-id',), yield_parents=True)])
+
+
+class TestInventoryFiltering(TestInventory):
+
+ def test_inv_filter_empty(self):
+ inv = self.prepare_inv_with_nested_dirs()
+ new_inv = inv.filter([])
+ self.assertEqual([
+ ('', 'tree-root'),
+ ], [(path, ie.file_id) for path, ie in new_inv.iter_entries()])
+
+ def test_inv_filter_files(self):
+ inv = self.prepare_inv_with_nested_dirs()
+ new_inv = inv.filter(['zz-id', 'hello-id', 'a-id'])
+ self.assertEqual([
+ ('', 'tree-root'),
+ ('src', 'src-id'),
+ ('src/hello.c', 'hello-id'),
+ ('src/sub', 'sub-id'),
+ ('src/sub/a', 'a-id'),
+ ('zz', 'zz-id'),
+ ], [(path, ie.file_id) for path, ie in new_inv.iter_entries()])
+
+ def test_inv_filter_dirs(self):
+ inv = self.prepare_inv_with_nested_dirs()
+ new_inv = inv.filter(['doc-id', 'sub-id'])
+ self.assertEqual([
+ ('', 'tree-root'),
+ ('doc', 'doc-id'),
+ ('src', 'src-id'),
+ ('src/sub', 'sub-id'),
+ ('src/sub/a', 'a-id'),
+ ], [(path, ie.file_id) for path, ie in new_inv.iter_entries()])
+
+ def test_inv_filter_files_and_dirs(self):
+ inv = self.prepare_inv_with_nested_dirs()
+ new_inv = inv.filter(['makefile-id', 'src-id'])
+ self.assertEqual([
+ ('', 'tree-root'),
+ ('Makefile', 'makefile-id'),
+ ('src', 'src-id'),
+ ('src/bye.c', 'bye-id'),
+ ('src/hello.c', 'hello-id'),
+ ('src/sub', 'sub-id'),
+ ('src/sub/a', 'a-id'),
+ ('src/zz.c', 'zzc-id'),
+ ], [(path, ie.file_id) for path, ie in new_inv.iter_entries()])
=== modified file 'bzrlib/tests/per_repository/test_repository.py'
--- a/bzrlib/tests/per_repository/test_repository.py 2009-03-18 01:27:58 +0000
+++ b/bzrlib/tests/per_repository/test_repository.py 2009-03-22 01:30:44 +0000
@@ -386,6 +386,54 @@
self.assertIsInstance(delta, TreeDelta)
self.assertEqual([('vla', 'file2', 'file')], delta.added)
+ def test_get_revision_delta_filtered(self):
+ tree_a = self.make_branch_and_tree('a')
+ self.build_tree(['a/foo', 'a/bar/', 'a/bar/b1', 'a/bar/b2', 'a/baz'])
+ tree_a.add(['foo', 'bar', 'bar/b1', 'bar/b2', 'baz'],
+ ['foo-id', 'bar-id', 'b1-id', 'b2-id', 'baz-id'])
+ tree_a.commit('rev1', rev_id='rev1')
+ self.build_tree(['a/bar/b3'])
+ tree_a.add('bar/b3', 'b3-id')
+ tree_a.commit('rev2', rev_id='rev2')
+
+ # Test multiple files
+ delta = tree_a.branch.repository.get_revision_delta('rev1',
+ specific_fileids=['foo-id', 'baz-id'])
+ self.assertIsInstance(delta, TreeDelta)
+ self.assertEqual([
+ ('baz', 'baz-id', 'file'),
+ ('foo', 'foo-id', 'file'),
+ ], delta.added)
+ # Test a directory
+ delta = tree_a.branch.repository.get_revision_delta('rev1',
+ specific_fileids=['bar-id'])
+ self.assertIsInstance(delta, TreeDelta)
+ self.assertEqual([
+ ('bar', 'bar-id', 'directory'),
+ ('bar/b1', 'b1-id', 'file'),
+ ('bar/b2', 'b2-id', 'file'),
+ ], delta.added)
+ # Test a file in a directory
+ delta = tree_a.branch.repository.get_revision_delta('rev1',
+ specific_fileids=['b2-id'])
+ self.assertIsInstance(delta, TreeDelta)
+ self.assertEqual([
+ ('bar', 'bar-id', 'directory'),
+ ('bar/b2', 'b2-id', 'file'),
+ ], delta.added)
+ # Try another revision
+ delta = tree_a.branch.repository.get_revision_delta('rev2',
+ specific_fileids=['b3-id'])
+ self.assertIsInstance(delta, TreeDelta)
+ self.assertEqual([
+ ('bar', 'bar-id', 'directory'),
+ ('bar/b3', 'b3-id', 'file'),
+ ], delta.added)
+ delta = tree_a.branch.repository.get_revision_delta('rev2',
+ specific_fileids=['foo-id'])
+ self.assertIsInstance(delta, TreeDelta)
+ self.assertEqual([], delta.added)
+
def test_clone_bzrdir_repository_revision(self):
# make a repository with some revisions,
# and clone it, this should not have unreferenced revisions.
More information about the bazaar-commits
mailing list