Rev 65: Possibly correct implementation of suggestions on btree indices. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Tue Dec 2 22:11:11 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 65
revision-id: robertc at robertcollins.net-20081202221109-n1tfmvm3aa9d3jpu
parent: robertc at robertcollins.net-20081202210630-zn6zbcm7lfkqi61x
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-12-03 09:11:09 +1100
message:
  Possibly correct implementation of suggestions on btree indices.
modified:
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
  tests/test_index.py            test_index.py-20080608055509-hnimeek7q8tctkqf-4
=== modified file 'index.py'
--- a/index.py	2008-12-02 21:06:30 +0000
+++ b/index.py	2008-12-02 22:11:09 +0000
@@ -1353,6 +1353,80 @@
                         value = self._bisect_nodes[candidate_key]
                         yield (self, candidate_key, value)
 
+
+class SuggestableBTreeGraphIndex(BTreeGraphIndex):
+    """A subclass of BTreeGraphIndex which adds starts_with searches.
+
+    These searches are used for providing suggestions.
+    """
+
+    def iter_entries_starts_with(self, key):
+        """Iterate over nodes which match key.
+
+        The first length()-1 terms in key must match exactly, and the last term
+        in key is used as a starts_with test.
+
+        :param key: The key to search with.
+        """
+        if not self.key_count():
+            return
+
+        # The lowest node to read in the next row.
+        low_index = 0
+        # the highest node to read in the next row.
+        high_index = 0
+        # Loop down the rows, setting low_index to the lowest node in the row
+        # that we need to read, and high_index to the highest.
+
+        key_prefix = key[:-1]
+        key_suffix = key[-1]
+
+        lower_key = key_prefix + ('',)
+        higher_key = key_prefix + (highest(),)
+
+        for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):
+            node_indices = set([low_index, high_index])
+            nodes = self._get_internal_nodes(node_indices)
+            low_node = nodes[low_index]
+            positions = self._multi_bisect_right([lower_key], node.keys)
+            node_offset = next_row_start + node.offset
+            low_index = node_offset + positions[0]
+            high_node = nodes[high_index]
+            positions = self._multi_bisect_right([higher_key], node.keys)
+            node_offset = next_row_start + node.offset
+            high_index = node_offset + positions[0]
+        # We should now be at the _LeafNodes
+        node_indices = range(low_index, high_index + 1)
+
+        # TODO: We may *not* want to always read all the nodes in one
+        #       big go. Consider setting a max size on this.
+
+        group_size = 100
+        groups = len(node_indices) / group_size + 1
+        for offset in range(groups):
+            node_group = node_indices[offset * group_size:(offset + 1) * group_size]
+            nodes = self._get_leaf_nodes(node_group)
+            for node in nodes.values():
+                # TODO bisect the edge nodes? / find boundaries and so skip
+                # some work.
+                for node_key, (value, refs) in sorted(node.keys.iteritems()):
+                    if node_key[:-1] != key_prefix:
+                        continue
+                    if not node_key[-1].startswith(key_suffix):
+                        continue
+                    if self.node_ref_lists:
+                        yield (self, node_key, value, refs)
+                    else:
+                        yield (self, node_key, value)
+
+
+class highest(object):
+    """An object that is always after everything else"""
+
+    def __cmp__(self, other):
+        return 1
+
+
 _original_make_search_filter = None
 
 

=== modified file 'tests/test_index.py'
--- a/tests/test_index.py	2008-12-02 21:06:30 +0000
+++ b/tests/test_index.py	2008-12-02 22:11:09 +0000
@@ -19,7 +19,7 @@
 
 from bzrlib.errors import NotBranchError, UnknownFormatError
 from bzrlib.btree_index import BTreeGraphIndex, BTreeBuilder
-from bzrlib.index import GraphIndex
+from bzrlib.index import InMemoryGraphIndex, GraphIndex
 from bzrlib import log
 from bzrlib.plugins import search
 from bzrlib.plugins.search import errors, index
@@ -38,10 +38,16 @@
         condition_isinstance((
             TestComponentIndexBuilder,
             TestComponentCombiner)))
+    graph_suggestion, other_tests = split_suite_by_condition(other_tests,
+        condition_isinstance(TestGraphIndexSuggestions))
     adapter = TestScenarioApplier()
     adapter.scenarios = [(format_string[:-1], {'format':format}) for
         format_string, format in index._FORMATS.items()]
     adapt_tests(component_tests, adapter, other_tests)
+    adapter.scenarios = [
+        ("GraphIndex", {'format': (InMemoryGraphIndex, index.SuggestableGraphIndex)}),
+        ("BTree", {'format': (BTreeBuilder, index.SuggestableBTreeGraphIndex)})]
+    adapt_tests(graph_suggestion, adapter, other_tests)
     return other_tests
 
 
@@ -618,19 +624,19 @@
     """Tests for the SuggestableGraphIndex subclass."""
 
     def test_key_length_1_no_hits(self):
-        builder = index.InMemoryGraphIndex(0, 1)
+        builder = self.format[0](0, 1)
         # We want nodes before and after the suggestions, to check boundaries.
         builder.add_node(('pre',), '', ())
         builder.add_node(('prep',), '', ())
         transport = self.get_transport()
         length = transport.put_file('index', builder.finish())
-        query_index = index.SuggestableGraphIndex(transport, 'index', length)
+        query_index = self.format[1](transport, 'index', length)
         # Now, searching for suggestions for 'pref' should find nothing.
         self.assertEqual([],
             list(query_index.iter_entries_starts_with(('pref',))))
 
     def test_key_length_1_iteration(self):
-        builder = index.InMemoryGraphIndex(0, 1)
+        builder = self.format[0](0, 1)
         # We want nodes before and after the suggestions, to check boundaries.
         builder.add_node(('pre',), '', ())
         builder.add_node(('prep',), '', ())
@@ -639,7 +645,7 @@
         builder.add_node(('preferential',), '', ())
         transport = self.get_transport()
         length = transport.put_file('index', builder.finish())
-        query_index = index.SuggestableGraphIndex(transport, 'index', length)
+        query_index = self.format[1](transport, 'index', length)
         # Now, searching for suggestions for 'pref' should find 'pref' and
         # 'preferential'.
         self.assertEqual([
@@ -649,7 +655,7 @@
             list(query_index.iter_entries_starts_with(('pref',))))
 
     def test_key_length_2_no_hits(self):
-        builder = index.InMemoryGraphIndex(0, 2)
+        builder = self.format[0](0, 2)
         # We want nodes before and after the suggestions, to check boundaries.
         # As there are two elements in each key, we want to check this for each
         # element, which implies 4 boundaries:
@@ -659,14 +665,14 @@
         builder.add_node(('prep', 'pref'), '', ())
         transport = self.get_transport()
         length = transport.put_file('index', builder.finish())
-        query_index = index.SuggestableGraphIndex(transport, 'index', length)
+        query_index = self.format[1](transport, 'index', length)
         # Now, searching for suggestions for 'pref', 'pref' should find
         # nothing.
         self.assertEqual([],
             list(query_index.iter_entries_starts_with(('pref', 'pref'))))
 
     def test_key_length_2_iteration(self):
-        builder = index.InMemoryGraphIndex(0, 2)
+        builder = self.format[0](0, 2)
         # We want nodes before and after the suggestions, to check boundaries.
         # - the first element of the key must be an exact match, the second is
         # a startswith match, so provide non-match entries that match the second
@@ -680,7 +686,7 @@
         builder.add_node(('pref', 'preferential'), '', ())
         transport = self.get_transport()
         length = transport.put_file('index', builder.finish())
-        query_index = index.SuggestableGraphIndex(transport, 'index', length)
+        query_index = self.format[1](transport, 'index', length)
         # Now, searching for suggestions for 'pref' should find 'pref' and
         # 'preferential'.
         self.assertEqual([




More information about the bazaar-commits mailing list