Rev 4604: Snapshot the work in progress. in http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry
John Arbash Meinel
john at arbash-meinel.com
Tue Aug 11 18:03:14 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.18-faster-iter-ancestry
------------------------------------------------------------
revno: 4604
revision-id: john at arbash-meinel.com-20090811170307-31ip7d2lswbbcr3w
parent: john at arbash-meinel.com-20090807212257-8j9pgs1pfxnzhcyv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-faster-iter-ancestry
timestamp: Tue 2009-08-11 12:03:07 -0500
message:
Snapshot the work in progress.
-------------- next part --------------
=== modified file 'NOTES'
--- a/NOTES 2009-08-07 21:22:57 +0000
+++ b/NOTES 2009-08-11 17:03:07 +0000
@@ -1538,7 +1538,7 @@
allows us to not try to search across all the indexes all the time, because we
follow whatever parent threads a given index can give us.
-The overhead of 19 indexes was only 245 => 309 or 1.26:1, rather than 3:1 for
+The overhead of 20 indexes was only 245 => 309 or 1.26:1, rather than 3:1 for
the get_parent_map implementation.
Also of interest is that in the single-index case the improvement was only
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-08-07 18:52:37 +0000
+++ b/bzrlib/btree_index.py 2009-08-11 17:03:07 +0000
@@ -1134,30 +1134,36 @@
else:
yield (self, next_sub_key, value)
- def get_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
- """Iterate over the given keys and all parents that are found.
-
- :param keys: A sorted list keys whose ancestry we want to return
+ def find_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
+ """Find the parent_map information for the set of keys.
+
+ This populates the parent_map dict and missing_keys set based on the
+ queried keys. It also can fill out an arbitrary number of parents that
+ it finds while searching for the supplied keys.
+
+ It is unlikely that you want to call this directly. See
+ "CombinedGraphIndex.get_ancestry()" for a more appropriate API.
+
+ :param keys: A keys whose ancestry we want to return
+ Every key will either end up in 'parent_map' or 'missing_keys'.
:param ref_list_num: This index in the ref_lists is the parents we
care about.
- :param parent_map: keys that we already know the parents to. when
- finding new keys we will add nodes to this dict.
+ :param parent_map: {key: parent_keys} for keys that are present in this
+ index. This may contain more entries than were in 'keys', that are
+ reachable ancestors of the keys requested.
:param missing_keys: keys which are known to be missing in this index.
- New entries that are found to be missing will be added to this set.
- :return: [not_present_keys], [search_keys]
- A dict mapping {key: parent_keys} but including all
- parent_keys that we encounter.
- not_present_keys are keys where we found the LeafNode, but the key
- just isn't there.
- search_keys parents that we found, which might exist in this
- index, but which we were unable to find immediately, callers
- should re-query this index for those keys.
+ This may include parents that were not directly requested, but we
+ were able to determine that they are not present in this index.
+ :return: search_keys parents that were found but not queried to know
+ if they are missing or present. Callers can re-query this index for
+ those keys, and they will be placed into parent_map or missing_keys
"""
if not self.key_count():
# We use key_count() to trigger reading the root node and
# determining info about this BTreeGraphIndex
# If we don't have any keys, then everything is missing
- return keys, parent_map
+ missing_keys.update(keys)
+ return set()
if ref_list_num >= self.node_ref_lists:
raise ValueError('No ref list %d, index has %d ref lists'
% (ref_list_num, self.node_ref_lists))
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-08-07 21:09:11 +0000
+++ b/bzrlib/index.py 2009-08-11 17:03:07 +0000
@@ -702,8 +702,9 @@
# the last thing looked up was a terminal element
yield (self, ) + key_dict
- def get_ancestry(self, keys, ref_list_num, parent_map, missing_keys):
- """See BTreeIndex.get_ancestry."""
+ def get_local_parent_map(self, keys, ref_list_num, parent_map,
+ missing_keys):
+ """See BTreeIndex.get_local_parent_map."""
# The api can be implemented as a trivial overlay on top of
# iter_entries, it is not an efficient implementation, but it at least
# gets the job done.
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-08-07 19:37:30 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-08-11 17:03:07 +0000
@@ -1105,6 +1105,16 @@
self.assertEqual(set(), missing_keys)
self.assertEqual(set(), search_keys)
+ def test_get_ancestry_empty_index(self):
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index.get_ancestry([('one',), ('two',)], 0, parent_map,
+ missing_keys)
+ self.assertEqual(set(), search_keys)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
class TestBTreeNodes(BTreeTestCase):
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2009-08-07 20:23:29 +0000
+++ b/bzrlib/tests/test_index.py 2009-08-11 17:03:07 +0000
@@ -1394,6 +1394,16 @@
parent_map)
self.assertEqual(set([key3]), missing_keys)
+ def test_get_ancestry_empty_index(self):
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index.get_ancestry([('one',), ('two',)], 0, parent_map,
+ missing_keys)
+ self.assertEqual(set(), search_keys)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
More information about the bazaar-commits
mailing list