Rev 2634: Also add iter_key_prefix support to InMemoryGraphIndex. in http://people.ubuntu.com/~robertc/baz2.0/index

Robert Collins robertc at robertcollins.net
Fri Jul 27 15:14:58 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/index

------------------------------------------------------------
revno: 2634
revision-id: robertc at robertcollins.net-20070727141454-q70m5m38oyp0el4c
parent: robertc at robertcollins.net-20070727122746-99wyymjtkwcu51k6
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Sat 2007-07-28 00:14:54 +1000
message:
  Also add iter_key_prefix support to InMemoryGraphIndex.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-07-27 12:27:46 +0000
+++ b/bzrlib/index.py	2007-07-27 14:14:54 +0000
@@ -65,6 +65,7 @@
         """
         self.reference_lists = reference_lists
         self._nodes = {}
+        self._nodes_by_key = {}
         self._key_length = key_elements
 
     def _check_key(self, key):
@@ -103,6 +104,21 @@
         if key in self._nodes and self._nodes[key][0] == '':
             raise errors.BadIndexDuplicateKey(key, self)
         self._nodes[key] = ('', tuple(node_refs), value)
+        if self._key_length > 1:
+            key_dict = self._nodes_by_key
+            if self.reference_lists:
+                key_value = key, value, tuple(node_refs)
+            else:
+                key_value = key, value
+            # possibly should do this on-demand, but it seems likely it is 
+            # always wanted
+            subkey = list(reversed(key[:-1]))
+            while len(subkey):
+                if subkey[-1] not in key_dict:
+                    key_dict[subkey[-1]] = {}
+                key_dict = key_dict[subkey[-1]]
+                subkey.pop(-1)
+            key_dict[key[-1]] = key_value
 
     def finish(self):
         lines = [_SIGNATURE]
@@ -580,5 +596,75 @@
                 if not node[0]:
                     yield key, node[2]
 
+    def iter_entries_prefix(self, keys):
+        """Iterate over keys within the index using prefix matching.
+
+        Prefix matching is applied within the tuple of a key, not to within
+        the bytestring of each key element. e.g. if you have the keys ('foo',
+        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
+        only the former key is returned.
+
+        :param keys: An iterable providing the key prefixes to be retrieved.
+            Each key prefix takes the form of a tuple the length of a key, but
+            with the last N elements 'None' rather than a regular bytestring.
+            The first element cannot be 'None'.
+        :return: An iterable as per iter_all_entries, but restricted to the
+            keys with a matching prefix to those supplied. No additional keys
+            will be returned, and every match that is in the index will be
+            returned.
+        """
+        # XXX: To much duplication with the GraphIndex class; consider finding
+        # a good place to pull out the actual common logic.
+        keys = set(keys)
+        if not keys:
+            return
+        if self._key_length == 1:
+            for key in keys:
+                # sanity check
+                if key[0] is None:
+                    raise errors.BadIndexKey(key)
+                if len(key) != self._key_length:
+                    raise errors.BadIndexKey(key)
+                node = self._nodes[key]
+                if node[0]:
+                    continue 
+                if self.reference_lists:
+                    yield key, node[2], node[1]
+                else:
+                    yield key, node[2]
+            return
+        for key in keys:
+            # sanity check
+            if key[0] is None:
+                raise errors.BadIndexKey(key)
+            if len(key) != self._key_length:
+                raise errors.BadIndexKey(key)
+            # find what it refers to:
+            key_dict = self._nodes_by_key
+            elements = list(key)
+            # find the subdict to return
+            try:
+                while len(elements) and elements[0] is not None:
+                    key_dict = key_dict[elements[0]]
+                    elements.pop(0)
+            except KeyError:
+                # a non-existant lookup.
+                continue
+            if len(elements):
+                dicts = [key_dict]
+                while dicts:
+                    key_dict = dicts.pop(-1)
+                    # can't be empty or would not exist
+                    item, value = key_dict.iteritems().next()
+                    if type(value) == dict:
+                        # push keys 
+                        dicts.extend(key_dict.itervalues())
+                    else:
+                        # yield keys
+                        for value in key_dict.itervalues():
+                            yield value
+            else:
+                yield key_dict
+
     def validate(self):
         """In memory index's have no known corruption at the moment."""

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2007-07-27 12:27:46 +0000
+++ b/bzrlib/tests/test_index.py	2007-07-27 14:14:54 +0000
@@ -635,8 +635,8 @@
 
 class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
 
-    def make_index(self, ref_lists=0, nodes=[]):
-        result = InMemoryGraphIndex(ref_lists)
+    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
+        result = InMemoryGraphIndex(ref_lists, key_elements=key_elements)
         result.add_nodes(nodes)
         return result
 
@@ -694,6 +694,46 @@
             (('ref', ), 'refdata', ((), ))]),
             set(index.iter_entries([('name', ), ('ref', )])))
 
+    def test_iter_key_prefix_1_key_element_no_refs(self):
+        index = self.make_index( nodes=[
+            (('name', ), 'data'),
+            (('ref', ), 'refdata')])
+        self.assertEqual(set([(('name', ), 'data'),
+            (('ref', ), 'refdata')]),
+            set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+    def test_iter_key_prefix_1_key_element_refs(self):
+        index = self.make_index(1, nodes=[
+            (('name', ), 'data', ([('ref', )], )),
+            (('ref', ), 'refdata', ([], ))])
+        self.assertEqual(set([(('name', ), 'data', ((('ref',),),)),
+            (('ref', ), 'refdata', ((), ))]),
+            set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+    def test_iter_key_prefix_2_key_element_no_refs(self):
+        index = self.make_index(key_elements=2, nodes=[
+            (('name', 'fin1'), 'data'),
+            (('name', 'fin2'), 'beta'),
+            (('ref', 'erence'), 'refdata')])
+        self.assertEqual(set([(('name', 'fin1'), 'data'),
+            (('ref', 'erence'), 'refdata')]),
+            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+        self.assertEqual(set([(('name', 'fin1'), 'data'),
+            (('name', 'fin2'), 'beta')]),
+            set(index.iter_entries_prefix([('name', None)])))
+
+    def test_iter_key_prefix_2_key_element_refs(self):
+        index = self.make_index(1, key_elements=2, nodes=[
+            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
+            (('name', 'fin2'), 'beta', ([], )),
+            (('ref', 'erence'), 'refdata', ([], ))])
+        self.assertEqual(set([(('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+            (('ref', 'erence'), 'refdata', ((), ))]),
+            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+        self.assertEqual(set([(('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+            (('name', 'fin2'), 'beta', ((), ))]),
+            set(index.iter_entries_prefix([('name', None)])))
+
     def test_iter_nothing_empty(self):
         index = self.make_index()
         self.assertEqual([], list(index.iter_entries([])))



More information about the bazaar-commits mailing list