Rev 2623: Build a combined graph index to use multiple indices at once. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Fri Jul 13 14:10:33 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2623
revision-id: robertc at robertcollins.net-20070713131030-xbo7bbks9zovdya4
parent: robertc at robertcollins.net-20070713112454-dsj464911g0l8wpa
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 23:10:30 +1000
message:
  Build a combined graph index to use multiple indices at once.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-07-13 11:24:54 +0000
+++ b/bzrlib/index.py	2007-07-13 13:10:30 +0000
@@ -264,3 +264,55 @@
         # iter_all validates completely at the moment, so just do that.
         for node in self.iter_all_entries():
             pass
+
+
+class CombinedGraphIndex(object):
+    """A GraphIndex made up from smaller GraphIndices.
+    
+    The backing indices must implement GraphIndex, and are presumed to be
+    static data.
+    """
+
+    def __init__(self, indices):
+        """Create a CombinedGraphIndex backed by indices.
+
+        :param indices: The indices to query for data.
+        """
+        self._indices = indices
+        
+    def iter_all_entries(self):
+        """Iterate over all keys within the index
+
+        :return: An iterable of (key, reference_lists, value). There is no
+            defined order for the result iteration - it will be in the most
+            efficient order for the index.
+        """
+        seen_keys = set()
+        for index in self._indices:
+            for node in index.iter_all_entries():
+                if node[0] not in seen_keys:
+                    yield node
+                    seen_keys.add(node[0])
+
+    def iter_entries(self, keys):
+        """Iterate over keys within the index.
+
+        :param keys: An iterable providing the keys to be retrieved.
+        :return: An iterable of (key, reference_lists, value). There is no
+            defined order for the result iteration - it will be in the most
+            efficient order for the index.
+        """
+        found = set()
+        keys = set(keys)
+        for node in self.iter_all_entries():
+            if node[0] in keys:
+                yield node
+                found.add(node[0])
+        missing = keys.difference(found)
+        if missing:
+            raise errors.MissingKey(self, missing.pop())
+
+    def validate(self):
+        """Validate that everything in the index can be accessed."""
+        for index in self._indices:
+            index.validate()

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2007-07-13 11:24:54 +0000
+++ b/bzrlib/tests/test_index.py	2007-07-13 13:10:30 +0000
@@ -17,7 +17,7 @@
 """Tests for indices."""
 
 from bzrlib import errors
-from bzrlib.index import GraphIndexBuilder, GraphIndex
+from bzrlib.index import CombinedGraphIndex, GraphIndexBuilder, GraphIndex
 from bzrlib.tests import TestCaseWithMemoryTransport
 
 
@@ -258,7 +258,7 @@
             set(index.iter_entries(['name'])))
         self.assertRaises(errors.MissingKey, list, index.iter_entries(['ref']))
 
-    def test_iter_nothing_empty(self):
+    def test_iter_all_keys(self):
         index = self.make_index(1, nodes=[
             ('name', (['ref'], ), 'data'),
             ('ref', ([], ), 'refdata')])
@@ -266,7 +266,7 @@
             ('ref', ((), ), 'refdata')]),
             set(index.iter_entries(['name', 'ref'])))
 
-    def test_iter_all_keys(self):
+    def test_iter_nothing_empty(self):
         index = self.make_index()
         self.assertEqual([], list(index.iter_entries([])))
 
@@ -312,3 +312,117 @@
     def test_validate_no_refs_content(self):
         index = self.make_index(nodes=[('key', (), 'value')])
         index.validate()
+
+
+class TestCombinedGraphIndex(TestCaseWithMemoryTransport):
+
+    def make_index(self, name, ref_lists=0, nodes=[]):
+        builder = GraphIndexBuilder(ref_lists)
+        for node, references, value in nodes:
+            builder.add_node(node, references, value)
+        stream = builder.finish()
+        trans = self.get_transport()
+        trans.put_file(name, stream)
+        return GraphIndex(trans, name)
+
+    def test_open_missing_index_no_error(self):
+        trans = self.get_transport()
+        index1 = GraphIndex(trans, 'missing')
+        index = CombinedGraphIndex([index1])
+
+    def test_iter_all_entries_empty(self):
+        index = CombinedGraphIndex([])
+        self.assertEqual([], list(index.iter_all_entries()))
+
+    def test_iter_all_entries_children_empty(self):
+        index1 = self.make_index('name')
+        index = CombinedGraphIndex([index1])
+        self.assertEqual([], list(index.iter_all_entries()))
+
+    def test_iter_all_entries_simple(self):
+        index1 = self.make_index('name', nodes=[('name', (), 'data')])
+        index = CombinedGraphIndex([index1])
+        self.assertEqual([('name', (), 'data')],
+            list(index.iter_all_entries()))
+
+    def test_iter_all_entries_two_indices(self):
+        index1 = self.make_index('name1', nodes=[('name', (), 'data')])
+        index2 = self.make_index('name2', nodes=[('2', (), '')])
+        index = CombinedGraphIndex([index1, index2])
+        self.assertEqual([('name', (), 'data'),
+            ('2', (), '')],
+            list(index.iter_all_entries()))
+
+    def test_iter_all_entries_two_indices_dup_key(self):
+        index1 = self.make_index('name1', nodes=[('name', (), 'data')])
+        index2 = self.make_index('name2', nodes=[('name', (), 'data')])
+        index = CombinedGraphIndex([index1, index2])
+        self.assertEqual([('name', (), 'data')],
+            list(index.iter_all_entries()))
+
+    def test_iter_nothing_empty(self):
+        index = CombinedGraphIndex([])
+        self.assertEqual([], list(index.iter_entries([])))
+
+    def test_iter_nothing_children_empty(self):
+        index1 = self.make_index('name')
+        index = CombinedGraphIndex([index1])
+        self.assertEqual([], list(index.iter_entries([])))
+
+    def test_iter_all_keys(self):
+        index1 = self.make_index('1', 1, nodes=[
+            ('name', (['ref'], ), 'data')])
+        index2 = self.make_index('2', 1, nodes=[
+            ('ref', ([], ), 'refdata')])
+        index = CombinedGraphIndex([index1, index2])
+        self.assertEqual(set([('name', (('ref',),), 'data'),
+            ('ref', ((), ), 'refdata')]),
+            set(index.iter_entries(['name', 'ref'])))
+ 
+    def test_iter_all_keys_dup_entry(self):
+        index1 = self.make_index('1', 1, nodes=[
+            ('name', (['ref'], ), 'data'),
+            ('ref', ([], ), 'refdata')])
+        index2 = self.make_index('2', 1, nodes=[
+            ('ref', ([], ), 'refdata')])
+        index = CombinedGraphIndex([index1, index2])
+        self.assertEqual(set([('name', (('ref',),), 'data'),
+            ('ref', ((), ), 'refdata')]),
+            set(index.iter_entries(['name', 'ref'])))
+ 
+    def test_iter_missing_entry_empty(self):
+        index = CombinedGraphIndex([])
+        self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+
+    def test_iter_missing_entry_one_index(self):
+        index1 = self.make_index('1')
+        index = CombinedGraphIndex([index1])
+        self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+
+    def test_iter_missing_entry_two_index(self):
+        index1 = self.make_index('1')
+        index2 = self.make_index('2')
+        index = CombinedGraphIndex([index1, index2])
+        self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+ 
+    def test_iter_entry_present_one_index_only(self):
+        index1 = self.make_index('1', nodes=[('key', (), '')])
+        index2 = self.make_index('2', nodes=[])
+        index = CombinedGraphIndex([index1, index2])
+        self.assertEqual([('key', (), '')],
+            list(index.iter_entries(['key'])))
+        # and in the other direction
+        index = CombinedGraphIndex([index2, index1])
+        self.assertEqual([('key', (), '')],
+            list(index.iter_entries(['key'])))
+
+    def test_validate_bad_child_index_errors(self):
+        trans = self.get_transport()
+        trans.put_bytes('name', "not an index\n")
+        index1 = GraphIndex(trans, 'name')
+        index = CombinedGraphIndex([index1])
+        self.assertRaises(errors.BadIndexFormatSignature, index.validate)
+
+    def test_validate_empty(self):
+        index = CombinedGraphIndex([])
+        index.validate()




More information about the bazaar-commits mailing list