Rev 29: Implement iter_entries_prefix (abentley, robertc) in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
Robert Collins
robertc at robertcollins.net
Sun Jul 13 08:28:19 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
------------------------------------------------------------
revno: 29
revision-id: robertc at robertcollins.net-20080713072815-sot304u35kahnexg
parent: robertc at robertcollins.net-20080712051426-pbkgal01tj82brpw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Sun 2008-07-13 17:28:15 +1000
message:
Implement iter_entries_prefix (abentley, robertc)
modified:
btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-08 02:41:12 +0000
+++ b/btree_index.py 2008-07-13 07:28:15 +0000
@@ -827,7 +827,92 @@
will be returned, and every match that is in the index will be
returned.
"""
- raise NotImplementedError(self.iter_entries_prefix)
+ keys = sorted(set(keys))
+ if not keys:
+ return
+ # Load if needed to check key lengths
+ if self._key_count is None:
+ self._get_root_node()
+ # TODO: only access nodes that can satisfy the prefixes we are looking
+ # for. For now, to meet API usage (as this function is not used by
+ # current bzrlib) just suck the entire index and iterate in memory.
+ nodes = {}
+ if self.node_ref_lists:
+ if self._key_length == 1:
+ for _1, key, value, refs in self.iter_all_entries():
+ nodes[key] = value, refs
+ else:
+ nodes_by_key = {}
+ for _1, key, value, refs in self.iter_all_entries():
+ key_value = key, value, refs
+ # For a key of (foo, bar, baz) create
+ # _nodes_by_key[foo][bar][baz] = key_value
+ key_dict = nodes_by_key
+ for subkey in key[:-1]:
+ key_dict = key_dict.setdefault(subkey, {})
+ key_dict[key[-1]] = key_value
+ else:
+ if self._key_length == 1:
+ for _1, key, value in self.iter_all_entries():
+ nodes[key] = value
+ else:
+ nodes_by_key = {}
+ for _1, key, value in self.iter_all_entries():
+ key_value = key, value
+ # For a key of (foo, bar, baz) create
+ # _nodes_by_key[foo][bar][baz] = key_value
+ key_dict = nodes_by_key
+ for subkey in key[:-1]:
+ key_dict = key_dict.setdefault(subkey, {})
+ key_dict[key[-1]] = key_value
+ if self._key_length == 1:
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise bzrerrors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise bzrerrors.BadIndexKey(key)
+ if self.node_ref_lists:
+ value, node_refs = nodes[key]
+ yield self, key, value, node_refs
+ else:
+ yield self, key, nodes[key]
+ return
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise bzrerrors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise bzrerrors.BadIndexKey(key)
+ # find what it refers to:
+ key_dict = nodes_by_key
+ elements = list(key)
+ # find the subdict whose contents should be returned.
+ try:
+ while len(elements) and elements[0] is not None:
+ key_dict = key_dict[elements[0]]
+ elements.pop(0)
+ except KeyError:
+ # a non-existant lookup.
+ continue
+ if len(elements):
+ dicts = [key_dict]
+ while dicts:
+ key_dict = dicts.pop(-1)
+ # can't be empty or would not exist
+ item, value = key_dict.iteritems().next()
+ if type(value) == dict:
+ # push keys
+ dicts.extend(key_dict.itervalues())
+ else:
+ # yield keys
+ for value in key_dict.itervalues():
+ # each value is the key:value:node refs tuple
+ # ready to yield.
+ yield (self, ) + value
+ else:
+ # the last thing looked up was a terminal element
+ yield (self, ) + key_dict
def key_count(self):
"""Return an estimate of the number of keys in this index.
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-07 22:42:56 +0000
+++ b/tests/test_btree_index.py 2008-07-13 07:28:15 +0000
@@ -21,7 +21,7 @@
import random
import zlib
-from bzrlib import tests
+from bzrlib import errors as bzrerrors, tests
from bzrlib.plugins import index2
from bzrlib.plugins.index2 import errors, btree_index
from bzrlib.plugins.pybloom.pybloom import BloomSHA1
@@ -311,6 +311,16 @@
class TestBTreeIndex(BTreeTestCase):
+ def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
+ builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
+ key_elements=key_elements)
+ for key, value, references in nodes:
+ builder.add_node(key, value, references)
+ stream = builder.finish()
+ trans = get_transport('trace+' + self.get_url())
+ size = trans.put_file('index', stream)
+ return btree_index.BTreeGraphIndex(trans, 'index', size)
+
def test_trivial_constructor(self):
transport = get_transport('trace+' + self.get_url(''))
index = btree_index.BTreeGraphIndex(transport, 'index', None)
@@ -660,6 +670,61 @@
self.assertEqualDiff(pprint.pformat(expected_result),
pprint.pformat(sorted_result))
+ def test_iter_key_prefix_1_element_key_None(self):
+ index = self.make_index()
+ self.assertRaises(bzrerrors.BadIndexKey, list,
+ index.iter_entries_prefix([(None, )]))
+
+ def test_iter_key_prefix_wrong_length(self):
+ index = self.make_index()
+ self.assertRaises(bzrerrors.BadIndexKey, list,
+ index.iter_entries_prefix([('foo', None)]))
+ index = self.make_index(key_elements=2)
+ self.assertRaises(bzrerrors.BadIndexKey, list,
+ index.iter_entries_prefix([('foo', )]))
+ self.assertRaises(bzrerrors.BadIndexKey, list,
+ index.iter_entries_prefix([('foo', None, None)]))
+
+ def test_iter_key_prefix_1_key_element_no_refs(self):
+ index = self.make_index( nodes=[
+ (('name', ), 'data', ()),
+ (('ref', ), 'refdata', ())])
+ self.assertEqual(set([(index, ('name', ), 'data'),
+ (index, ('ref', ), 'refdata')]),
+ set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+ def test_iter_key_prefix_1_key_element_refs(self):
+ index = self.make_index(1, nodes=[
+ (('name', ), 'data', ([('ref', )], )),
+ (('ref', ), 'refdata', ([], ))])
+ self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
+ (index, ('ref', ), 'refdata', ((), ))]),
+ set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+ def test_iter_key_prefix_2_key_element_no_refs(self):
+ index = self.make_index(key_elements=2, nodes=[
+ (('name', 'fin1'), 'data', ()),
+ (('name', 'fin2'), 'beta', ()),
+ (('ref', 'erence'), 'refdata', ())])
+ self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
+ (index, ('ref', 'erence'), 'refdata')]),
+ set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+ self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
+ (index, ('name', 'fin2'), 'beta')]),
+ set(index.iter_entries_prefix([('name', None)])))
+
+ def test_iter_key_prefix_2_key_element_refs(self):
+ index = self.make_index(1, key_elements=2, nodes=[
+ (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
+ (('name', 'fin2'), 'beta', ([], )),
+ (('ref', 'erence'), 'refdata', ([], ))])
+ self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+ (index, ('ref', 'erence'), 'refdata', ((), ))]),
+ set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+ self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+ (index, ('name', 'fin2'), 'beta', ((), ))]),
+ set(index.iter_entries_prefix([('name', None)])))
+
class TestBTreeNodes(BTreeTestCase):
More information about the bazaar-commits
mailing list