Rev 4604: Snapshot the work in progress. in http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry

John Arbash Meinel john at arbash-meinel.com
Tue Aug 11 18:03:14 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry

------------------------------------------------------------
revno: 4604
revision-id: john at arbash-meinel.com-20090811170307-31ip7d2lswbbcr3w
parent: john at arbash-meinel.com-20090807212257-8j9pgs1pfxnzhcyv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-faster-iter-ancestry
timestamp: Tue 2009-08-11 12:03:07 -0500
message:
  Snapshot the work in progress.
-------------- next part --------------
=== modified file 'NOTES'
--- a/NOTES	2009-08-07 21:22:57 +0000
+++ b/NOTES	2009-08-11 17:03:07 +0000
@@ -1538,7 +1538,7 @@
 allows us to not try to search across all the indexes all the time, because we
 follow whatever parent threads a given index can give us.
 
-The overhead of 19 indexes was only 245 => 309 or 1.26:1, rather than 3:1 for
+The overhead of 20 indexes was only 245 => 309 or 1.26:1, rather than 3:1 for
 the get_parent_map implementation.
 
 Also of interest is that in the single-index case the improvement was only

=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-08-07 18:52:37 +0000
+++ b/bzrlib/btree_index.py	2009-08-11 17:03:07 +0000
@@ -1134,30 +1134,36 @@
                     else:
                         yield (self, next_sub_key, value)
 
-    def get_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
-        """Iterate over the given keys and all parents that are found.
-
-        :param keys: A sorted list keys whose ancestry we want to return
+    def find_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
+        """Find the parent_map information for the set of keys.
+
+        This populates the parent_map dict and missing_keys set based on the
+        queried keys. It also can fill out an arbitrary number of parents that
+        it finds while searching for the supplied keys.
+
+        It is unlikely that you want to call this directly. See
+        "CombinedGraphIndex.get_ancestry()" for a more appropriate API.
+
+        :param keys: A keys whose ancestry we want to return
+            Every key will either end up in 'parent_map' or 'missing_keys'.
         :param ref_list_num: This index in the ref_lists is the parents we
             care about.
-        :param parent_map: keys that we already know the parents to. when
-            finding new keys we will add nodes to this dict.
+        :param parent_map: {key: parent_keys} for keys that are present in this
+            index. This may contain more entries than were in 'keys', that are
+            reachable ancestors of the keys requested.
         :param missing_keys: keys which are known to be missing in this index.
-            New entries that are found to be missing will be added to this set.
-        :return: [not_present_keys], [search_keys]
-            A dict mapping {key: parent_keys} but including all
-                parent_keys that we encounter.
-            not_present_keys are keys where we found the LeafNode, but the key
-                just isn't there.
-            search_keys parents that we found, which might exist in this
-                index, but which we were unable to find immediately, callers
-                should re-query this index for those keys.
+            This may include parents that were not directly requested, but we
+            were able to determine that they are not present in this index.
+        :return: search_keys    parents that were found but not queried to know
+            if they are missing or present. Callers can re-query this index for
+            those keys, and they will be placed into parent_map or missing_keys
         """
         if not self.key_count():
             # We use key_count() to trigger reading the root node and
             # determining info about this BTreeGraphIndex
             # If we don't have any keys, then everything is missing
-            return keys, parent_map
+            missing_keys.update(keys)
+            return set()
         if ref_list_num >= self.node_ref_lists:
             raise ValueError('No ref list %d, index has %d ref lists'
                 % (ref_list_num, self.node_ref_lists))

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-08-07 21:09:11 +0000
+++ b/bzrlib/index.py	2009-08-11 17:03:07 +0000
@@ -702,8 +702,9 @@
                 # the last thing looked up was a terminal element
                 yield (self, ) + key_dict
 
-    def get_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
-        """See BTreeIndex.get_ancestry."""
+    def get_local_parent_map(self, keys, ref_list_num, parent_map,
+                             missing_keys):
+        """See BTreeIndex.get_local_parent_map."""
         # The api can be implemented as a trivial overlay on top of
         # iter_entries, it is not an efficient implementation, but it at least
         # gets the job done.

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2009-08-07 19:37:30 +0000
+++ b/bzrlib/tests/test_btree_index.py	2009-08-11 17:03:07 +0000
@@ -1105,6 +1105,16 @@
         self.assertEqual(set(), missing_keys)
         self.assertEqual(set(), search_keys)
 
+    def test_get_ancestry_empty_index(self):
+        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
+        parent_map = {}
+        missing_keys = set()
+        search_keys = index.get_ancestry([('one',), ('two',)], 0, parent_map,
+                                         missing_keys)
+        self.assertEqual(set(), search_keys)
+        self.assertEqual({}, parent_map)
+        self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
 
 class TestBTreeNodes(BTreeTestCase):
 

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2009-08-07 20:23:29 +0000
+++ b/bzrlib/tests/test_index.py	2009-08-11 17:03:07 +0000
@@ -1394,6 +1394,16 @@
                          parent_map)
         self.assertEqual(set([key3]), missing_keys)
 
+    def test_get_ancestry_empty_index(self):
+        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
+        parent_map = {}
+        missing_keys = set()
+        search_keys = index.get_ancestry([('one',), ('two',)], 0, parent_map,
+                                         missing_keys)
+        self.assertEqual(set(), search_keys)
+        self.assertEqual({}, parent_map)
+        self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
 
 class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
 



More information about the bazaar-commits mailing list