Rev 4601: Implement CombinedGraphIndex.get_ancestry() in http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry

John Arbash Meinel john at arbash-meinel.com
Fri Aug 7 21:23:39 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry

------------------------------------------------------------
revno: 4601
revision-id: john at arbash-meinel.com-20090807202329-svclvb8rmpf8kw9s
parent: john at arbash-meinel.com-20090807193730-b4ib7013ei79rfdy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-faster-iter-ancestry
timestamp: Fri 2009-08-07 15:23:29 -0500
message:
  Implement CombinedGraphIndex.get_ancestry()
  
  This is the appropriate wrapper around GraphIndex/BtreeIndex.get_ancestry.
  It should allow quick access to getting the complete ancestry graph
  of the given set of keys and do so in a fairly efficient fashion.
-------------- next part --------------
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-08-07 19:37:30 +0000
+++ b/bzrlib/index.py	2009-08-07 20:23:29 +0000
@@ -1314,6 +1314,44 @@
             except errors.NoSuchFile:
                 self._reload_or_raise()
 
+    def get_ancestry(self, keys):
+        """Find the complete ancestry for the given set of keys.
+
+        :param keys: An iterable of keys to look for
+        :return: (parent_map, missing_keys)
+        """
+        missing_keys = set()
+        parent_map = {}
+        keys_to_lookup = set(keys)
+        while keys_to_lookup:
+            # keys that *all* indexes claim are missing, stop searching them
+            all_index_missing = None
+            for index in self._indices:
+                index_missing_keys = set()
+                # Find all of the ancestry we can from this index
+                # keep looking until the search_keys set is empty, which means
+                # things we didn't find should be in index_missing_keys
+                search_keys = keys_to_lookup
+                while search_keys:
+                    search_keys = index.get_ancestry(search_keys, 0,
+                        parent_map, index_missing_keys)
+                # Now set whatever was missing to be searched in the next index
+                keys_to_lookup = index_missing_keys
+                if all_index_missing is None:
+                    all_index_missing = set(index_missing_keys)
+                else:
+                    all_index_missing.intersection_update(index_missing_keys)
+                if not keys_to_lookup:
+                    break
+            if all_index_missing is None:
+                # There were no indexes, so all search keys are 'missing'
+                missing_keys.update(keys_to_lookup)
+                keys_to_lookup = None
+            else:
+                missing_keys.update(all_index_missing)
+                keys_to_lookup.difference_update(all_index_missing)
+        return parent_map, missing_keys
+
     def key_count(self):
         """Return an estimate of the number of keys in this index.
 

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2009-08-07 19:37:30 +0000
+++ b/bzrlib/tests/test_index.py	2009-08-07 20:23:29 +0000
@@ -1324,6 +1324,76 @@
                                     ['1', '2', '3'])
         self.assertRaises(errors.NoSuchFile, index.validate)
 
+    def test_get_ancestry_across_indexes(self):
+        key1 = ('key-1',)
+        key2 = ('key-2',)
+        key3 = ('key-3',)
+        key4 = ('key-4',)
+        index1 = self.make_index('12', ref_lists=1, nodes=[
+            (key1, 'value', ([],)),
+            (key2, 'value', ([key1],)),
+            ])
+        index2 = self.make_index('34', ref_lists=1, nodes=[
+            (key3, 'value', ([key2],)),
+            (key4, 'value', ([key3],)),
+            ])
+        c_index = CombinedGraphIndex([index1, index2])
+        parent_map, missing_keys = c_index.get_ancestry([key1])
+        self.assertEqual({key1: ()}, parent_map)
+        self.assertEqual(set(), missing_keys)
+        # Now look for a key from index2 which requires us to find the key in
+        # the second index, and then continue searching for parents in the
+        # first index
+        parent_map, missing_keys = c_index.get_ancestry([key3])
+        self.assertEqual({key1: (), key2: (key1,), key3: (key2,)}, parent_map)
+        self.assertEqual(set(), missing_keys)
+
+    def test_get_ancestry_missing_keys(self):
+        key1 = ('key-1',)
+        key2 = ('key-2',)
+        key3 = ('key-3',)
+        key4 = ('key-4',)
+        index1 = self.make_index('12', ref_lists=1, nodes=[
+            (key1, 'value', ([],)),
+            (key2, 'value', ([key1],)),
+            ])
+        index2 = self.make_index('34', ref_lists=1, nodes=[
+            (key3, 'value', ([key2],)),
+            ])
+        c_index = CombinedGraphIndex([index1, index2])
+        # Searching for a key which is actually not present at all should
+        # eventually converge
+        parent_map, missing_keys = c_index.get_ancestry([key4])
+        self.assertEqual({}, parent_map)
+        self.assertEqual(set([key4]), missing_keys)
+
+    def test_get_ancestry_no_indexes(self):
+        c_index = CombinedGraphIndex([])
+        key1 = ('key-1',)
+        parent_map, missing_keys = c_index.get_ancestry([key1])
+        self.assertEqual({}, parent_map)
+        self.assertEqual(set([key1]), missing_keys)
+
+    def test_get_ancestry_ghost_parent(self):
+        key1 = ('key-1',)
+        key2 = ('key-2',)
+        key3 = ('key-3',)
+        key4 = ('key-4',)
+        index1 = self.make_index('12', ref_lists=1, nodes=[
+            (key1, 'value', ([],)),
+            (key2, 'value', ([key1],)),
+            ])
+        index2 = self.make_index('34', ref_lists=1, nodes=[
+            (key4, 'value', ([key2, key3],)),
+            ])
+        c_index = CombinedGraphIndex([index1, index2])
+        # Searching for a key which is actually not present at all should
+        # eventually converge
+        parent_map, missing_keys = c_index.get_ancestry([key4])
+        self.assertEqual({key4: (key2, key3), key2: (key1,), key1: ()},
+                         parent_map)
+        self.assertEqual(set([key3]), missing_keys)
+
 
 class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
 



More information about the bazaar-commits mailing list