Rev 2634: Also add iter_key_prefix support to InMemoryGraphIndex. in http://people.ubuntu.com/~robertc/baz2.0/index
Robert Collins
robertc at robertcollins.net
Fri Jul 27 15:14:58 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/index
------------------------------------------------------------
revno: 2634
revision-id: robertc at robertcollins.net-20070727141454-q70m5m38oyp0el4c
parent: robertc at robertcollins.net-20070727122746-99wyymjtkwcu51k6
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Sat 2007-07-28 00:14:54 +1000
message:
Also add iter_key_prefix support to InMemoryGraphIndex.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-27 12:27:46 +0000
+++ b/bzrlib/index.py 2007-07-27 14:14:54 +0000
@@ -65,6 +65,7 @@
"""
self.reference_lists = reference_lists
self._nodes = {}
+ self._nodes_by_key = {}
self._key_length = key_elements
def _check_key(self, key):
@@ -103,6 +104,21 @@
if key in self._nodes and self._nodes[key][0] == '':
raise errors.BadIndexDuplicateKey(key, self)
self._nodes[key] = ('', tuple(node_refs), value)
+ if self._key_length > 1:
+ key_dict = self._nodes_by_key
+ if self.reference_lists:
+ key_value = key, value, tuple(node_refs)
+ else:
+ key_value = key, value
+ # possibly should do this on-demand, but it seems likely it is
+ # always wanted
+ subkey = list(reversed(key[:-1]))
+ while len(subkey):
+ if subkey[-1] not in key_dict:
+ key_dict[subkey[-1]] = {}
+ key_dict = key_dict[subkey[-1]]
+ subkey.pop(-1)
+ key_dict[key[-1]] = key_value
def finish(self):
lines = [_SIGNATURE]
@@ -580,5 +596,75 @@
if not node[0]:
yield key, node[2]
+ def iter_entries_prefix(self, keys):
+ """Iterate over keys within the index using prefix matching.
+
+ Prefix matching is applied within the tuple of a key, not to within
+ the bytestring of each key element. e.g. if you have the keys ('foo',
+ 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
+ only the former key is returned.
+
+ :param keys: An iterable providing the key prefixes to be retrieved.
+ Each key prefix takes the form of a tuple the length of a key, but
+ with the last N elements 'None' rather than a regular bytestring.
+ The first element cannot be 'None'.
+ :return: An iterable as per iter_all_entries, but restricted to the
+ keys with a matching prefix to those supplied. No additional keys
+ will be returned, and every match that is in the index will be
+ returned.
+ """
+ # XXX: To much duplication with the GraphIndex class; consider finding
+ # a good place to pull out the actual common logic.
+ keys = set(keys)
+ if not keys:
+ return
+ if self._key_length == 1:
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise errors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise errors.BadIndexKey(key)
+ node = self._nodes[key]
+ if node[0]:
+ continue
+ if self.reference_lists:
+ yield key, node[2], node[1]
+ else:
+ yield key, node[2]
+ return
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise errors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise errors.BadIndexKey(key)
+ # find what it refers to:
+ key_dict = self._nodes_by_key
+ elements = list(key)
+ # find the subdict to return
+ try:
+ while len(elements) and elements[0] is not None:
+ key_dict = key_dict[elements[0]]
+ elements.pop(0)
+ except KeyError:
+ # a non-existant lookup.
+ continue
+ if len(elements):
+ dicts = [key_dict]
+ while dicts:
+ key_dict = dicts.pop(-1)
+ # can't be empty or would not exist
+ item, value = key_dict.iteritems().next()
+ if type(value) == dict:
+ # push keys
+ dicts.extend(key_dict.itervalues())
+ else:
+ # yield keys
+ for value in key_dict.itervalues():
+ yield value
+ else:
+ yield key_dict
+
def validate(self):
"""In memory index's have no known corruption at the moment."""
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2007-07-27 12:27:46 +0000
+++ b/bzrlib/tests/test_index.py 2007-07-27 14:14:54 +0000
@@ -635,8 +635,8 @@
class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):
- def make_index(self, ref_lists=0, nodes=[]):
- result = InMemoryGraphIndex(ref_lists)
+ def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
+ result = InMemoryGraphIndex(ref_lists, key_elements=key_elements)
result.add_nodes(nodes)
return result
@@ -694,6 +694,46 @@
(('ref', ), 'refdata', ((), ))]),
set(index.iter_entries([('name', ), ('ref', )])))
+ def test_iter_key_prefix_1_key_element_no_refs(self):
+ index = self.make_index( nodes=[
+ (('name', ), 'data'),
+ (('ref', ), 'refdata')])
+ self.assertEqual(set([(('name', ), 'data'),
+ (('ref', ), 'refdata')]),
+ set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+ def test_iter_key_prefix_1_key_element_refs(self):
+ index = self.make_index(1, nodes=[
+ (('name', ), 'data', ([('ref', )], )),
+ (('ref', ), 'refdata', ([], ))])
+ self.assertEqual(set([(('name', ), 'data', ((('ref',),),)),
+ (('ref', ), 'refdata', ((), ))]),
+ set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+ def test_iter_key_prefix_2_key_element_no_refs(self):
+ index = self.make_index(key_elements=2, nodes=[
+ (('name', 'fin1'), 'data'),
+ (('name', 'fin2'), 'beta'),
+ (('ref', 'erence'), 'refdata')])
+ self.assertEqual(set([(('name', 'fin1'), 'data'),
+ (('ref', 'erence'), 'refdata')]),
+ set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+ self.assertEqual(set([(('name', 'fin1'), 'data'),
+ (('name', 'fin2'), 'beta')]),
+ set(index.iter_entries_prefix([('name', None)])))
+
+ def test_iter_key_prefix_2_key_element_refs(self):
+ index = self.make_index(1, key_elements=2, nodes=[
+ (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
+ (('name', 'fin2'), 'beta', ([], )),
+ (('ref', 'erence'), 'refdata', ([], ))])
+ self.assertEqual(set([(('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+ (('ref', 'erence'), 'refdata', ((), ))]),
+ set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+ self.assertEqual(set([(('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+ (('name', 'fin2'), 'beta', ((), ))]),
+ set(index.iter_entries_prefix([('name', None)])))
+
def test_iter_nothing_empty(self):
index = self.make_index()
self.assertEqual([], list(index.iter_entries([])))
More information about the bazaar-commits
mailing list