Rev 65: Possibly correct implementation of suggestions on btree indices. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Tue Dec 2 22:11:11 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 65
revision-id: robertc at robertcollins.net-20081202221109-n1tfmvm3aa9d3jpu
parent: robertc at robertcollins.net-20081202210630-zn6zbcm7lfkqi61x
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-12-03 09:11:09 +1100
message:
Possibly correct implementation of suggestions on btree indices.
modified:
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
tests/test_index.py test_index.py-20080608055509-hnimeek7q8tctkqf-4
=== modified file 'index.py'
--- a/index.py 2008-12-02 21:06:30 +0000
+++ b/index.py 2008-12-02 22:11:09 +0000
@@ -1353,6 +1353,80 @@
value = self._bisect_nodes[candidate_key]
yield (self, candidate_key, value)
+
+class SuggestableBTreeGraphIndex(BTreeGraphIndex):
+ """A subclass of BTreeGraphIndex which adds starts_with searches.
+
+ These searches are used for providing suggestions.
+ """
+
+ def iter_entries_starts_with(self, key):
+ """Iterate over nodes which match key.
+
+ The first length()-1 terms in key must match exactly, and the last term
+ in key is used as a starts_with test.
+
+ :param key: The key to search with.
+ """
+ if not self.key_count():
+ return
+
+ # The lowest node to read in the next row.
+ low_index = 0
+ # the highest node to read in the next row.
+ high_index = 0
+ # Loop down the rows, setting low_index to the lowest node in the row
+ # that we need to read, and high_index to the highest.
+
+ key_prefix = key[:-1]
+ key_suffix = key[-1]
+
+ lower_key = key_prefix + ('',)
+ higher_key = key_prefix + (highest(),)
+
+ for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):
+ node_indices = set([low_index, high_index])
+ nodes = self._get_internal_nodes(node_indices)
+ low_node = nodes[low_index]
+ positions = self._multi_bisect_right([lower_key], node.keys)
+ node_offset = next_row_start + node.offset
+ low_index = node_offset + positions[0]
+ high_node = nodes[high_index]
+ positions = self._multi_bisect_right([higher_key], node.keys)
+ node_offset = next_row_start + node.offset
+ high_index = node_offset + positions[0]
+ # We should now be at the _LeafNodes
+ node_indices = range(low_index, high_index + 1)
+
+ # TODO: We may *not* want to always read all the nodes in one
+ # big go. Consider setting a max size on this.
+
+ group_size = 100
+ groups = len(node_indices) / group_size + 1
+ for offset in range(groups):
+ node_group = node_indices[offset * group_size:(offset + 1) * group_size]
+ nodes = self._get_leaf_nodes(node_group)
+ for node in nodes.values():
+ # TODO bisect the edge nodes? / find boundaries and so skip
+ # some work.
+ for node_key, (value, refs) in sorted(node.keys.iteritems()):
+ if node_key[:-1] != key_prefix:
+ continue
+ if not node_key[-1].startswith(key_suffix):
+ continue
+ if self.node_ref_lists:
+ yield (self, node_key, value, refs)
+ else:
+ yield (self, node_key, value)
+
+
+class highest(object):
+ """An object that is always after everything else"""
+
+ def __cmp__(self, other):
+ return 1
+
+
_original_make_search_filter = None
=== modified file 'tests/test_index.py'
--- a/tests/test_index.py 2008-12-02 21:06:30 +0000
+++ b/tests/test_index.py 2008-12-02 22:11:09 +0000
@@ -19,7 +19,7 @@
from bzrlib.errors import NotBranchError, UnknownFormatError
from bzrlib.btree_index import BTreeGraphIndex, BTreeBuilder
-from bzrlib.index import GraphIndex
+from bzrlib.index import InMemoryGraphIndex, GraphIndex
from bzrlib import log
from bzrlib.plugins import search
from bzrlib.plugins.search import errors, index
@@ -38,10 +38,16 @@
condition_isinstance((
TestComponentIndexBuilder,
TestComponentCombiner)))
+ graph_suggestion, other_tests = split_suite_by_condition(other_tests,
+ condition_isinstance(TestGraphIndexSuggestions))
adapter = TestScenarioApplier()
adapter.scenarios = [(format_string[:-1], {'format':format}) for
format_string, format in index._FORMATS.items()]
adapt_tests(component_tests, adapter, other_tests)
+ adapter.scenarios = [
+ ("GraphIndex", {'format': (InMemoryGraphIndex, index.SuggestableGraphIndex)}),
+ ("BTree", {'format': (BTreeBuilder, index.SuggestableBTreeGraphIndex)})]
+ adapt_tests(graph_suggestion, adapter, other_tests)
return other_tests
@@ -618,19 +624,19 @@
"""Tests for the SuggestableGraphIndex subclass."""
def test_key_length_1_no_hits(self):
- builder = index.InMemoryGraphIndex(0, 1)
+ builder = self.format[0](0, 1)
# We want nodes before and after the suggestions, to check boundaries.
builder.add_node(('pre',), '', ())
builder.add_node(('prep',), '', ())
transport = self.get_transport()
length = transport.put_file('index', builder.finish())
- query_index = index.SuggestableGraphIndex(transport, 'index', length)
+ query_index = self.format[1](transport, 'index', length)
# Now, searching for suggestions for 'pref' should find nothing.
self.assertEqual([],
list(query_index.iter_entries_starts_with(('pref',))))
def test_key_length_1_iteration(self):
- builder = index.InMemoryGraphIndex(0, 1)
+ builder = self.format[0](0, 1)
# We want nodes before and after the suggestions, to check boundaries.
builder.add_node(('pre',), '', ())
builder.add_node(('prep',), '', ())
@@ -639,7 +645,7 @@
builder.add_node(('preferential',), '', ())
transport = self.get_transport()
length = transport.put_file('index', builder.finish())
- query_index = index.SuggestableGraphIndex(transport, 'index', length)
+ query_index = self.format[1](transport, 'index', length)
# Now, searching for suggestions for 'pref' should find 'pref' and
# 'preferential'.
self.assertEqual([
@@ -649,7 +655,7 @@
list(query_index.iter_entries_starts_with(('pref',))))
def test_key_length_2_no_hits(self):
- builder = index.InMemoryGraphIndex(0, 2)
+ builder = self.format[0](0, 2)
# We want nodes before and after the suggestions, to check boundaries.
# As there are two elements in each key, we want to check this for each
# element, which implies 4 boundaries:
@@ -659,14 +665,14 @@
builder.add_node(('prep', 'pref'), '', ())
transport = self.get_transport()
length = transport.put_file('index', builder.finish())
- query_index = index.SuggestableGraphIndex(transport, 'index', length)
+ query_index = self.format[1](transport, 'index', length)
# Now, searching for suggestions for 'pref', 'pref' should find
# nothing.
self.assertEqual([],
list(query_index.iter_entries_starts_with(('pref', 'pref'))))
def test_key_length_2_iteration(self):
- builder = index.InMemoryGraphIndex(0, 2)
+ builder = self.format[0](0, 2)
# We want nodes before and after the suggestions, to check boundaries.
# - the first element of the key must be an exact match, the second is
# a startswith match, so provide non-match entries that match the second
@@ -680,7 +686,7 @@
builder.add_node(('pref', 'preferential'), '', ())
transport = self.get_transport()
length = transport.put_file('index', builder.finish())
- query_index = index.SuggestableGraphIndex(transport, 'index', length)
+ query_index = self.format[1](transport, 'index', length)
# Now, searching for suggestions for 'pref' should find 'pref' and
# 'preferential'.
self.assertEqual([
More information about the bazaar-commits
mailing list