Rev 4613: (jam) Introduce CombinedGraphIndex.find_ancestry and in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Fri Aug 14 20:14:30 BST 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4613 [merge]
revision-id: pqm at pqm.ubuntu.com-20090814191423-zs4ej0tde7yhamha
parent: pqm at pqm.ubuntu.com-20090814164557-05bpjwxjr318ptaf
parent: john at arbash-meinel.com-20090814173603-m2bfkj5haqgvzl7r
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2009-08-14 20:14:23 +0100
message:
(jam) Introduce CombinedGraphIndex.find_ancestry and
BTreeGraphIndex._find_ancestors()
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'NEWS'
--- a/NEWS 2009-08-14 16:45:57 +0000
+++ b/NEWS 2009-08-14 17:36:03 +0000
@@ -81,6 +81,13 @@
* RemoteBranch.open now honours ignore_fallbacks correctly on bzr-v2
protocols. (Robert Collins)
+* The index code now has some specialized routines to extract the full
+ ancestry of a key in a more efficient manner.
+ ``CombinedGraphIndex.find_ancestry()``. This is not fully exposed to the
+ higher levels yet, but has the potential to improve grabbing the full
+ ancestry tremendously. (Time to get ancestry for bzr.dev drops from 1.5s
+ down to 300ms. For OOo from 33s => 10.5s) (John Arbash Meinel)
+
Testing
*******
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-07-01 10:51:47 +0000
+++ b/bzrlib/btree_index.py 2009-08-13 19:56:26 +0000
@@ -586,13 +586,19 @@
class _LeafNode(object):
"""A leaf node for a serialised B+Tree index."""
- __slots__ = ('keys',)
+ __slots__ = ('keys', 'min_key', 'max_key')
def __init__(self, bytes, key_length, ref_list_length):
"""Parse bytes to create a leaf node object."""
# splitlines mangles the \r delimiters.. don't use it.
- self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,
- key_length, ref_list_length))
+ key_list = _btree_serializer._parse_leaf_lines(bytes,
+ key_length, ref_list_length)
+ if key_list:
+ self.min_key = key_list[0][0]
+ self.max_key = key_list[-1][0]
+ else:
+ self.min_key = self.max_key = None
+ self.keys = dict(key_list)
class _InternalNode(object):
@@ -1039,6 +1045,39 @@
output.append(cur_out)
return output
+ def _walk_through_internal_nodes(self, keys):
+ """Take the given set of keys, and find the corresponding LeafNodes.
+
+ :param keys: An unsorted iterable of keys to search for
+ :return: (nodes, index_and_keys)
+ nodes is a dict mapping {index: LeafNode}
+ keys_at_index is a list of tuples of [(index, [keys for Leaf])]
+ """
+ # 6 seconds spent in miss_torture using the sorted() line.
+ # Even with out of order disk IO it seems faster not to sort it when
+ # large queries are being made.
+ keys_at_index = [(0, sorted(keys))]
+
+ for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):
+ node_indexes = [idx for idx, s_keys in keys_at_index]
+ nodes = self._get_internal_nodes(node_indexes)
+
+ next_nodes_and_keys = []
+ for node_index, sub_keys in keys_at_index:
+ node = nodes[node_index]
+ positions = self._multi_bisect_right(sub_keys, node.keys)
+ node_offset = next_row_start + node.offset
+ next_nodes_and_keys.extend([(node_offset + pos, s_keys)
+ for pos, s_keys in positions])
+ keys_at_index = next_nodes_and_keys
+ # We should now be at the _LeafNodes
+ node_indexes = [idx for idx, s_keys in keys_at_index]
+
+ # TODO: We may *not* want to always read all the nodes in one
+ # big go. Consider setting a max size on this.
+ nodes = self._get_leaf_nodes(node_indexes)
+ return nodes, keys_at_index
+
def iter_entries(self, keys):
"""Iterate over keys within the index.
@@ -1082,32 +1121,7 @@
needed_keys = keys
if not needed_keys:
return
- # 6 seconds spent in miss_torture using the sorted() line.
- # Even with out of order disk IO it seems faster not to sort it when
- # large queries are being made.
- needed_keys = sorted(needed_keys)
-
- nodes_and_keys = [(0, needed_keys)]
-
- for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):
- node_indexes = [idx for idx, s_keys in nodes_and_keys]
- nodes = self._get_internal_nodes(node_indexes)
-
- next_nodes_and_keys = []
- for node_index, sub_keys in nodes_and_keys:
- node = nodes[node_index]
- positions = self._multi_bisect_right(sub_keys, node.keys)
- node_offset = next_row_start + node.offset
- next_nodes_and_keys.extend([(node_offset + pos, s_keys)
- for pos, s_keys in positions])
- nodes_and_keys = next_nodes_and_keys
- # We should now be at the _LeafNodes
- node_indexes = [idx for idx, s_keys in nodes_and_keys]
-
- # TODO: We may *not* want to always read all the nodes in one
- # big go. Consider setting a max size on this.
-
- nodes = self._get_leaf_nodes(node_indexes)
+ nodes, nodes_and_keys = self._walk_through_internal_nodes(needed_keys)
for node_index, sub_keys in nodes_and_keys:
if not sub_keys:
continue
@@ -1120,6 +1134,133 @@
else:
yield (self, next_sub_key, value)
+ def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
+ """Find the parent_map information for the set of keys.
+
+ This populates the parent_map dict and missing_keys set based on the
+ queried keys. It also can fill out an arbitrary number of parents that
+ it finds while searching for the supplied keys.
+
+ It is unlikely that you want to call this directly. See
+ "CombinedGraphIndex.find_ancestry()" for a more appropriate API.
+
+ :param keys: A keys whose ancestry we want to return
+ Every key will either end up in 'parent_map' or 'missing_keys'.
+ :param ref_list_num: This index in the ref_lists is the parents we
+ care about.
+ :param parent_map: {key: parent_keys} for keys that are present in this
+ index. This may contain more entries than were in 'keys', that are
+ reachable ancestors of the keys requested.
+ :param missing_keys: keys which are known to be missing in this index.
+ This may include parents that were not directly requested, but we
+ were able to determine that they are not present in this index.
+ :return: search_keys parents that were found but not queried to know
+ if they are missing or present. Callers can re-query this index for
+ those keys, and they will be placed into parent_map or missing_keys
+ """
+ if not self.key_count():
+ # We use key_count() to trigger reading the root node and
+ # determining info about this BTreeGraphIndex
+ # If we don't have any keys, then everything is missing
+ missing_keys.update(keys)
+ return set()
+ if ref_list_num >= self.node_ref_lists:
+ raise ValueError('No ref list %d, index has %d ref lists'
+ % (ref_list_num, self.node_ref_lists))
+
+ # The main trick we are trying to accomplish is that when we find a
+ # key listing its parents, we expect that the parent key is also likely
+ # to sit on the same page. Allowing us to expand parents quickly
+ # without suffering the full stack of bisecting, etc.
+ nodes, nodes_and_keys = self._walk_through_internal_nodes(keys)
+
+ # These are parent keys which could not be immediately resolved on the
+ # page where the child was present. Note that we may already be
+ # searching for that key, and it may actually be present [or known
+ # missing] on one of the other pages we are reading.
+ # TODO:
+ # We could try searching for them in the immediate previous or next
+ # page. If they occur "later" we could put them in a pending lookup
+ # set, and then for each node we read thereafter we could check to
+ # see if they are present.
+ # However, we don't know the impact of keeping this list of things
+ # that I'm going to search for every node I come across from here on
+ # out.
+ # It doesn't handle the case when the parent key is missing on a
+ # page that we *don't* read. So we already have to handle being
+ # re-entrant for that.
+ # Since most keys contain a date string, they are more likely to be
+ # found earlier in the file than later, but we would know that right
+ # away (key < min_key), and wouldn't keep searching it on every other
+ # page that we read.
+ # Mostly, it is an idea, one which should be benchmarked.
+ parents_not_on_page = set()
+
+ for node_index, sub_keys in nodes_and_keys:
+ if not sub_keys:
+ continue
+ # sub_keys is all of the keys we are looking for that should exist
+ # on this page, if they aren't here, then they won't be found
+ node = nodes[node_index]
+ node_keys = node.keys
+ parents_to_check = set()
+ for next_sub_key in sub_keys:
+ if next_sub_key not in node_keys:
+ # This one is just not present in the index at all
+ missing_keys.add(next_sub_key)
+ else:
+ value, refs = node_keys[next_sub_key]
+ parent_keys = refs[ref_list_num]
+ parent_map[next_sub_key] = parent_keys
+ parents_to_check.update(parent_keys)
+ # Don't look for things we've already found
+ parents_to_check = parents_to_check.difference(parent_map)
+ # this can be used to test the benefit of having the check loop
+ # inlined.
+ # parents_not_on_page.update(parents_to_check)
+ # continue
+ while parents_to_check:
+ next_parents_to_check = set()
+ for key in parents_to_check:
+ if key in node_keys:
+ value, refs = node_keys[key]
+ parent_keys = refs[ref_list_num]
+ parent_map[key] = parent_keys
+ next_parents_to_check.update(parent_keys)
+ else:
+ # This parent either is genuinely missing, or should be
+ # found on another page. Perf test whether it is better
+ # to check if this node should fit on this page or not.
+ # in the 'everything-in-one-pack' scenario, this *not*
+ # doing the check is 237ms vs 243ms.
+ # So slightly better, but I assume the standard 'lots
+ # of packs' is going to show a reasonable improvement
+ # from the check, because it avoids 'going around
+ # again' for everything that is in another index
+ # parents_not_on_page.add(key)
+ # Missing for some reason
+ if key < node.min_key:
+ # in the case of bzr.dev, 3.4k/5.3k misses are
+ # 'earlier' misses (65%)
+ parents_not_on_page.add(key)
+ elif key > node.max_key:
+ # This parent key would be present on a different
+ # LeafNode
+ parents_not_on_page.add(key)
+ else:
+ # assert key != node.min_key and key != node.max_key
+ # If it was going to be present, it would be on
+ # *this* page, so mark it missing.
+ missing_keys.add(key)
+ parents_to_check = next_parents_to_check.difference(parent_map)
+ # Might want to do another .difference() from missing_keys
+ # parents_not_on_page could have been found on a different page, or be
+ # known to be missing. So cull out everything that has already been
+ # found.
+ search_keys = parents_not_on_page.difference(
+ parent_map).difference(missing_keys)
+ return search_keys
+
def iter_entries_prefix(self, keys):
"""Iterate over keys within the index using prefix matching.
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-07-01 10:53:08 +0000
+++ b/bzrlib/index.py 2009-08-13 19:56:26 +0000
@@ -702,6 +702,23 @@
# the last thing looked up was a terminal element
yield (self, ) + key_dict
+ def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
+ """See BTreeIndex._find_ancestors."""
+ # The api can be implemented as a trivial overlay on top of
+ # iter_entries, it is not an efficient implementation, but it at least
+ # gets the job done.
+ found_keys = set()
+ search_keys = set()
+ for index, key, value, refs in self.iter_entries(keys):
+ parent_keys = refs[ref_list_num]
+ found_keys.add(key)
+ parent_map[key] = parent_keys
+ search_keys.update(parent_keys)
+ # Figure out what, if anything, was missing
+ missing_keys.update(set(keys).difference(found_keys))
+ search_keys = search_keys.difference(parent_map)
+ return search_keys
+
def key_count(self):
"""Return an estimate of the number of keys in this index.
@@ -1297,6 +1314,69 @@
except errors.NoSuchFile:
self._reload_or_raise()
+ def find_ancestry(self, keys, ref_list_num):
+ """Find the complete ancestry for the given set of keys.
+
+ Note that this is a whole-ancestry request, so it should be used
+ sparingly.
+
+ :param keys: An iterable of keys to look for
+ :param ref_list_num: The reference list which references the parents
+ we care about.
+ :return: (parent_map, missing_keys)
+ """
+ missing_keys = set()
+ parent_map = {}
+ keys_to_lookup = set(keys)
+ generation = 0
+ while keys_to_lookup:
+ # keys that *all* indexes claim are missing, stop searching them
+ generation += 1
+ all_index_missing = None
+ # print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
+ # print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
+ # len(parent_map),
+ # len(missing_keys))
+ for index_idx, index in enumerate(self._indices):
+ # TODO: we should probably be doing something with
+ # 'missing_keys' since we've already determined that
+ # those revisions have not been found anywhere
+ index_missing_keys = set()
+ # Find all of the ancestry we can from this index
+ # keep looking until the search_keys set is empty, which means
+ # things we didn't find should be in index_missing_keys
+ search_keys = keys_to_lookup
+ sub_generation = 0
+ # print ' \t%2d\t\t%4d\t%5d\t%5d' % (
+ # index_idx, len(search_keys),
+ # len(parent_map), len(index_missing_keys))
+ while search_keys:
+ sub_generation += 1
+ # TODO: ref_list_num should really be a parameter, since
+ # CombinedGraphIndex does not know what the ref lists
+ # mean.
+ search_keys = index._find_ancestors(search_keys,
+ ref_list_num, parent_map, index_missing_keys)
+ # print ' \t \t%2d\t%4d\t%5d\t%5d' % (
+ # sub_generation, len(search_keys),
+ # len(parent_map), len(index_missing_keys))
+ # Now set whatever was missing to be searched in the next index
+ keys_to_lookup = index_missing_keys
+ if all_index_missing is None:
+ all_index_missing = set(index_missing_keys)
+ else:
+ all_index_missing.intersection_update(index_missing_keys)
+ if not keys_to_lookup:
+ break
+ if all_index_missing is None:
+ # There were no indexes, so all search keys are 'missing'
+ missing_keys.update(keys_to_lookup)
+ keys_to_lookup = None
+ else:
+ missing_keys.update(all_index_missing)
+ keys_to_lookup.difference_update(all_index_missing)
+ return parent_map, missing_keys
+
def key_count(self):
"""Return an estimate of the number of keys in this index.
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-06-22 12:52:39 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-08-13 19:56:26 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
from bzrlib import (
btree_index,
errors,
+ osutils,
tests,
)
from bzrlib.tests import (
@@ -980,6 +981,140 @@
])
self.assertEqual(set([]), index.external_references(0))
+ def test__find_ancestors_one_page(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([],)),
+ ])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
+ self.assertEqual({key1: (key2,), key2: ()}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_one_page_w_missing(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([],)),
+ ])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key2, key3], 0, parent_map,
+ missing_keys)
+ self.assertEqual({key2: ()}, parent_map)
+ # we know that key3 is missing because we read the page that it would
+ # otherwise be on
+ self.assertEqual(set([key3]), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_one_parent_missing(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([key3],)),
+ ])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key1], 0, parent_map,
+ missing_keys)
+ self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ # all we know is that key3 wasn't present on the page we were reading
+ # but if you look, the last key is key2 which comes before key3, so we
+ # don't know whether key3 would land on this page or not.
+ self.assertEqual(set([key3]), search_keys)
+ search_keys = index._find_ancestors(search_keys, 0, parent_map,
+ missing_keys)
+ # passing it back in, we are sure it is 'missing'
+ self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
+ self.assertEqual(set([key3]), missing_keys)
+ self.assertEqual(set([]), search_keys)
+
+ def test__find_ancestors_dont_search_known(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([key3],)),
+ (key3, 'value', ([],)),
+ ])
+ # We already know about key2, so we won't try to search for key3
+ parent_map = {key2: (key3,)}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key1], 0, parent_map,
+ missing_keys)
+ self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_multiple_pages(self):
+ # We need to use enough keys that we actually cause a split
+ start_time = 1249671539
+ email = "joebob at example.com"
+ nodes = []
+ ref_lists = ((),)
+ rev_keys = []
+ for i in xrange(400):
+ rev_id = '%s-%s-%s' % (email,
+ osutils.compact_date(start_time + i),
+ osutils.rand_chars(16))
+ rev_key = (rev_id,)
+ nodes.append((rev_key, 'value', ref_lists))
+ # We have a ref 'list' of length 1, with a list of parents, with 1
+ # parent which is a key
+ ref_lists = ((rev_key,),)
+ rev_keys.append(rev_key)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
+ self.assertEqual(400, index.key_count())
+ self.assertEqual(3, len(index._row_offsets))
+ nodes = dict(index._read_nodes([1, 2]))
+ l1 = nodes[1]
+ l2 = nodes[2]
+ min_l2_key = l2.min_key
+ max_l1_key = l1.max_key
+ self.assertTrue(max_l1_key < min_l2_key)
+ parents_min_l2_key = l2.keys[min_l2_key][1][0]
+ self.assertEqual((l1.max_key,), parents_min_l2_key)
+ # Now, whatever key we select that would fall on the second page,
+ # should give us all the parents until the page break
+ key_idx = rev_keys.index(min_l2_key)
+ next_key = rev_keys[key_idx+1]
+ # So now when we get the parent map, we should get the key we are
+ # looking for, min_l2_key, and then a reference to go look for the
+ # parent of that key
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([next_key], 0, parent_map,
+ missing_keys)
+ self.assertEqual([min_l2_key, next_key], sorted(parent_map))
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set([max_l1_key]), search_keys)
+ parent_map = {}
+ search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
+ missing_keys)
+ self.assertEqual(sorted(l1.keys), sorted(parent_map))
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_empty_index(self):
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
+ missing_keys)
+ self.assertEqual(set(), search_keys)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
class TestBTreeNodes(BTreeTestCase):
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_index.py 2009-08-13 19:56:26 +0000
@@ -953,6 +953,59 @@
])
self.assertEqual(set([]), index.external_references(0))
+ def test__find_ancestors(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([],)),
+ ])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
+ self.assertEqual({key1: (key2,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set([key2]), search_keys)
+ search_keys = index._find_ancestors(search_keys, 0, parent_map,
+ missing_keys)
+ self.assertEqual({key1: (key2,), key2: ()}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_w_missing(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([],)),
+ ])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key2, key3], 0, parent_map,
+ missing_keys)
+ self.assertEqual({key2: ()}, parent_map)
+ self.assertEqual(set([key3]), missing_keys)
+ self.assertEqual(set(), search_keys)
+
+ def test__find_ancestors_dont_search_known(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ index = self.make_index(ref_lists=1, key_elements=1, nodes=[
+ (key1, 'value', ([key2],)),
+ (key2, 'value', ([key3],)),
+ (key3, 'value', ([],)),
+ ])
+ # We already know about key2, so we won't try to search for key3
+ parent_map = {key2: (key3,)}
+ missing_keys = set()
+ search_keys = index._find_ancestors([key1], 0, parent_map,
+ missing_keys)
+ self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ self.assertEqual(set(), search_keys)
+
class TestCombinedGraphIndex(TestCaseWithMemoryTransport):
@@ -1271,6 +1324,86 @@
['1', '2', '3'])
self.assertRaises(errors.NoSuchFile, index.validate)
+ def test_find_ancestors_across_indexes(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key3, 'value', ([key2],)),
+ (key4, 'value', ([key3],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ parent_map, missing_keys = c_index.find_ancestry([key1], 0)
+ self.assertEqual({key1: ()}, parent_map)
+ self.assertEqual(set(), missing_keys)
+ # Now look for a key from index2 which requires us to find the key in
+ # the second index, and then continue searching for parents in the
+ # first index
+ parent_map, missing_keys = c_index.find_ancestry([key3], 0)
+ self.assertEqual({key1: (), key2: (key1,), key3: (key2,)}, parent_map)
+ self.assertEqual(set(), missing_keys)
+
+ def test_find_ancestors_missing_keys(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key3, 'value', ([key2],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ # Searching for a key which is actually not present at all should
+ # eventually converge
+ parent_map, missing_keys = c_index.find_ancestry([key4], 0)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([key4]), missing_keys)
+
+ def test_find_ancestors_no_indexes(self):
+ c_index = CombinedGraphIndex([])
+ key1 = ('key-1',)
+ parent_map, missing_keys = c_index.find_ancestry([key1], 0)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([key1]), missing_keys)
+
+ def test_find_ancestors_ghost_parent(self):
+ key1 = ('key-1',)
+ key2 = ('key-2',)
+ key3 = ('key-3',)
+ key4 = ('key-4',)
+ index1 = self.make_index('12', ref_lists=1, nodes=[
+ (key1, 'value', ([],)),
+ (key2, 'value', ([key1],)),
+ ])
+ index2 = self.make_index('34', ref_lists=1, nodes=[
+ (key4, 'value', ([key2, key3],)),
+ ])
+ c_index = CombinedGraphIndex([index1, index2])
+ # Searching for a key which is actually not present at all should
+ # eventually converge
+ parent_map, missing_keys = c_index.find_ancestry([key4], 0)
+ self.assertEqual({key4: (key2, key3), key2: (key1,), key1: ()},
+ parent_map)
+ self.assertEqual(set([key3]), missing_keys)
+
+ def test__find_ancestors_empty_index(self):
+ index = self.make_index('test', ref_lists=1, key_elements=1, nodes=[])
+ parent_map = {}
+ missing_keys = set()
+ search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
+ missing_keys)
+ self.assertEqual(set(), search_keys)
+ self.assertEqual({}, parent_map)
+ self.assertEqual(set([('one',), ('two',)]), missing_keys)
+
class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
More information about the bazaar-commits
mailing list