Rev 29: Implement iter_entries_prefix (abentley, robertc) in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

Robert Collins robertc at robertcollins.net
Sun Jul 13 08:28:19 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

------------------------------------------------------------
revno: 29
revision-id: robertc at robertcollins.net-20080713072815-sot304u35kahnexg
parent: robertc at robertcollins.net-20080712051426-pbkgal01tj82brpw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Sun 2008-07-13 17:28:15 +1000
message:
  Implement iter_entries_prefix (abentley, robertc)
modified:
  btree_index.py                 index.py-20080624222253-p0x5f92uyh5hw734-7
  tests/test_btree_index.py      test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-08 02:41:12 +0000
+++ b/btree_index.py	2008-07-13 07:28:15 +0000
@@ -827,7 +827,92 @@
             will be returned, and every match that is in the index will be
             returned.
         """
-        raise NotImplementedError(self.iter_entries_prefix)
+        keys = sorted(set(keys))
+        if not keys:
+            return
+        # Load if needed to check key lengths
+        if self._key_count is None:
+            self._get_root_node()
+        # TODO: only access nodes that can satisfy the prefixes we are looking
+        # for. For now, to meet API usage (as this function is not used by 
+        # current bzrlib) just suck the entire index and iterate in memory.
+        nodes = {}
+        if self.node_ref_lists:
+            if self._key_length == 1:
+                for _1, key, value, refs in self.iter_all_entries():
+                    nodes[key] = value, refs
+            else:
+                nodes_by_key = {}
+                for _1, key, value, refs in self.iter_all_entries():
+                    key_value = key, value, refs
+                    # For a key of (foo, bar, baz) create
+                    # _nodes_by_key[foo][bar][baz] = key_value
+                    key_dict = nodes_by_key
+                    for subkey in key[:-1]:
+                        key_dict = key_dict.setdefault(subkey, {})
+                    key_dict[key[-1]] = key_value
+        else:
+            if self._key_length == 1:
+                for _1, key, value in self.iter_all_entries():
+                    nodes[key] = value
+            else:
+                nodes_by_key = {}
+                for _1, key, value in self.iter_all_entries():
+                    key_value = key, value
+                    # For a key of (foo, bar, baz) create
+                    # _nodes_by_key[foo][bar][baz] = key_value
+                    key_dict = nodes_by_key
+                    for subkey in key[:-1]:
+                        key_dict = key_dict.setdefault(subkey, {})
+                    key_dict[key[-1]] = key_value
+        if self._key_length == 1:
+            for key in keys:
+                # sanity check
+                if key[0] is None:
+                    raise bzrerrors.BadIndexKey(key)
+                if len(key) != self._key_length:
+                    raise bzrerrors.BadIndexKey(key)
+                if self.node_ref_lists:
+                    value, node_refs = nodes[key]
+                    yield self, key, value, node_refs
+                else:
+                    yield self, key, nodes[key]
+            return
+        for key in keys:
+            # sanity check
+            if key[0] is None:
+                raise bzrerrors.BadIndexKey(key)
+            if len(key) != self._key_length:
+                raise bzrerrors.BadIndexKey(key)
+            # find what it refers to:
+            key_dict = nodes_by_key
+            elements = list(key)
+            # find the subdict whose contents should be returned.
+            try:
+                while len(elements) and elements[0] is not None:
+                    key_dict = key_dict[elements[0]]
+                    elements.pop(0)
+            except KeyError:
+                # a non-existant lookup.
+                continue
+            if len(elements):
+                dicts = [key_dict]
+                while dicts:
+                    key_dict = dicts.pop(-1)
+                    # can't be empty or would not exist
+                    item, value = key_dict.iteritems().next()
+                    if type(value) == dict:
+                        # push keys 
+                        dicts.extend(key_dict.itervalues())
+                    else:
+                        # yield keys
+                        for value in key_dict.itervalues():
+                            # each value is the key:value:node refs tuple
+                            # ready to yield.
+                            yield (self, ) + value
+            else:
+                # the last thing looked up was a terminal element
+                yield (self, ) + key_dict
 
     def key_count(self):
         """Return an estimate of the number of keys in this index.

=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py	2008-07-07 22:42:56 +0000
+++ b/tests/test_btree_index.py	2008-07-13 07:28:15 +0000
@@ -21,7 +21,7 @@
 import random
 import zlib
 
-from bzrlib import tests
+from bzrlib import errors as bzrerrors, tests
 from bzrlib.plugins import index2
 from bzrlib.plugins.index2 import errors, btree_index
 from bzrlib.plugins.pybloom.pybloom import BloomSHA1
@@ -311,6 +311,16 @@
 
 class TestBTreeIndex(BTreeTestCase):
 
+    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
+        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
+            key_elements=key_elements)
+        for key, value, references in nodes:
+            builder.add_node(key, value, references)
+        stream = builder.finish()
+        trans = get_transport('trace+' + self.get_url())
+        size = trans.put_file('index', stream)
+        return btree_index.BTreeGraphIndex(trans, 'index', size)
+
     def test_trivial_constructor(self):
         transport = get_transport('trace+' + self.get_url(''))
         index = btree_index.BTreeGraphIndex(transport, 'index', None)
@@ -660,6 +670,61 @@
             self.assertEqualDiff(pprint.pformat(expected_result),
                                  pprint.pformat(sorted_result))
 
+    def test_iter_key_prefix_1_element_key_None(self):
+        index = self.make_index()
+        self.assertRaises(bzrerrors.BadIndexKey, list,
+            index.iter_entries_prefix([(None, )]))
+
+    def test_iter_key_prefix_wrong_length(self):
+        index = self.make_index()
+        self.assertRaises(bzrerrors.BadIndexKey, list,
+            index.iter_entries_prefix([('foo', None)]))
+        index = self.make_index(key_elements=2)
+        self.assertRaises(bzrerrors.BadIndexKey, list,
+            index.iter_entries_prefix([('foo', )]))
+        self.assertRaises(bzrerrors.BadIndexKey, list,
+            index.iter_entries_prefix([('foo', None, None)]))
+
+    def test_iter_key_prefix_1_key_element_no_refs(self):
+        index = self.make_index( nodes=[
+            (('name', ), 'data', ()),
+            (('ref', ), 'refdata', ())])
+        self.assertEqual(set([(index, ('name', ), 'data'),
+            (index, ('ref', ), 'refdata')]),
+            set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+    def test_iter_key_prefix_1_key_element_refs(self):
+        index = self.make_index(1, nodes=[
+            (('name', ), 'data', ([('ref', )], )),
+            (('ref', ), 'refdata', ([], ))])
+        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
+            (index, ('ref', ), 'refdata', ((), ))]),
+            set(index.iter_entries_prefix([('name', ), ('ref', )])))
+
+    def test_iter_key_prefix_2_key_element_no_refs(self):
+        index = self.make_index(key_elements=2, nodes=[
+            (('name', 'fin1'), 'data', ()),
+            (('name', 'fin2'), 'beta', ()),
+            (('ref', 'erence'), 'refdata', ())])
+        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
+            (index, ('ref', 'erence'), 'refdata')]),
+            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
+            (index, ('name', 'fin2'), 'beta')]),
+            set(index.iter_entries_prefix([('name', None)])))
+
+    def test_iter_key_prefix_2_key_element_refs(self):
+        index = self.make_index(1, key_elements=2, nodes=[
+            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
+            (('name', 'fin2'), 'beta', ([], )),
+            (('ref', 'erence'), 'refdata', ([], ))])
+        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+            (index, ('ref', 'erence'), 'refdata', ((), ))]),
+            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
+        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
+            (index, ('name', 'fin2'), 'beta', ((), ))]),
+            set(index.iter_entries_prefix([('name', None)])))
+
 
 class TestBTreeNodes(BTreeTestCase):
 




More information about the bazaar-commits mailing list