Rev 4601: Implement CombinedGraphIndex.get_ancestry() in http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 7 21:23:39 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry
------------------------------------------------------------
revno: 4601
revision-id: john at arbash-meinel.com-20090807202329-svclvb8rmpf8kw9s
parent: john at arbash-meinel.com-20090807193730-b4ib7013ei79rfdy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-faster-iter-ancestry
timestamp: Fri 2009-08-07 15:23:29 -0500
message:
Implement CombinedGraphIndex.get_ancestry()
This is the appropriate wrapper around GraphIndex/BtreeIndex.get_ancestry.
It should allow quick access to getting the complete ancestry graph
of the given set of keys and do so in a fairly efficient fashion.
-------------- next part --------------
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-08-07 19:37:30 +0000
+++ b/bzrlib/index.py 2009-08-07 20:23:29 +0000
@@ -1314,6 +1314,44 @@
except errors.NoSuchFile:
self._reload_or_raise()
+ def get_ancestry(self, keys):
+ """Find the complete ancestry for the given set of keys.
+
+ :param keys: An iterable of keys to look for
+ :return: (parent_map, missing_keys)
+ """
+ missing_keys = set()
+ parent_map = {}
+ keys_to_lookup = set(keys)
+ while keys_to_lookup:
+ # keys that *all* indexes claim are missing, stop searching them
+ all_index_missing = None
+ for index in self._indices:
+ index_missing_keys = set()
+ # Find all of the ancestry we can from this index
+ # keep looking until the search_keys set is empty, which means
+ # things we didn't find should be in index_missing_keys
+ search_keys = keys_to_lookup
+ while search_keys:
+ search_keys = index.get_ancestry(search_keys, 0,
+ parent_map, index_missing_keys)
+ # Now set whatever was missing to be searched in the next index
+ keys_to_lookup = index_missing_keys
+ if all_index_missing is None:
+ all_index_missing = set(index_missing_keys)
+ else:
+ all_index_missing.intersection_update(index_missing_keys)
+ if not keys_to_lookup:
+ break
+ if all_index_missing is None:
+ # There were no indexes, so all search keys are 'missing'
+ missing_keys.update(keys_to_lookup)
+ keys_to_lookup = None
+ else:
+ missing_keys.update(all_index_missing)
+ keys_to_lookup.difference_update(all_index_missing)
+ return parent_map, missing_keys
+
def key_count(self):
"""Return an estimate of the number of keys in this index.
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2009-08-07 19:37:30 +0000
+++ b/bzrlib/tests/test_index.py 2009-08-07 20:23:29 +0000
@@ -1324,6 +1324,76 @@
['1', '2', '3'])
self.assertRaises(errors.NoSuchFile, index.validate)
+ def test_get_ancestry_across_indexes(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key3, 'value', ([key2],)),
+ (key4, 'value', ([key3],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ parent_map, missing_keys = c_index.get_ancestry([key1])
+ self.assertEqual({key1: ()}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ # Now look for a key from index2 which requires us to find the key in
+ # the second index, and then continue searching for parents in the
+ # first index
+ parent_map, missing_keys = c_index.get_ancestry([key3])
+ self.assertEqual({key1: (), key2: (key1,), key3: (key2,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+
+ def test_get_ancestry_missing_keys(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key3, 'value', ([key2],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ # Searching for a key which is actually not present at all should
+ # eventually converge
+ parent_map, missing_keys = c_index.get_ancestry([key4])
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([key4]), missing_keys)
+
+ def test_get_ancestry_no_indexes(self):
+ c_index = CombinedGraphIndex([])
+ key1 = ('key-1',)
+ parent_map, missing_keys = c_index.get_ancestry([key1])
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([key1]), missing_keys)
+
+ def test_get_ancestry_ghost_parent(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key4, 'value', ([key2, key3],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ # Searching for a key which is actually not present at all should
+ # eventually converge
+ parent_map, missing_keys = c_index.get_ancestry([key4])
+ self.assertEqual({key4: (key2, key3), key2: (key1,), key1: ()},
+ parent_map)
+ self.assertEqual(set([key3]), missing_keys)
+
class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
More information about the bazaar-commits
mailing list