Rev 5109: Introduce the aggregate-index concept. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

Tue Mar 9 18:17:52 GMT 2010

At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5109
revision-id: john at arbash-meinel.com-20100309181742-l81zsib6fbtx5yju
parent: john at arbash-meinel.com-20100309180751-y6i1eub2mbkabjw6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Tue 2010-03-09 12:17:42 -0600
message:
  Introduce the aggregate-index concept.
  
  The idea is that most searches return the index where something is found,
  and then we indirect through AggregateIndex.index_to_pack and
  its .data_access member in order to get back to the original pack file.
  We probably want to keep that in the long run.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'

--- a/bzrlib/pack_collection.py	2010-03-09 18:07:51 +0000
+++ b/bzrlib/pack_collection.py	2010-03-09 18:17:42 +0000
@@ -26,6 +26,7 @@
 from bzrlib import (
     bencode,
     btree_index,
+    index as _mod_index,
     errors,
     )
 
@@ -486,6 +487,89 @@
         raise NotImplementedError(self.create_pack_from_memo)
 
 
+# Note: copied from pack_repo.py, and trimmed
+class AggregateIndex(object):
+    """An aggregated index for PackCollection.
+
+    AggregateIndex is reponsible for managing the PackAccess object,
+    Index-To-Pack mapping, and all indices list for a specific type of index
+    such as 'revision index'.
+
+    A CombinedIndex provides an index on a single key space built up
+    from several on-disk indices.  The AggregateIndex builds on this
+    to provide a data access layer, and allows having up to one writable
+    index within the collection.
+    """
+
+    def __init__(self, reload_func=None, flush_func=None):
+        """Create an AggregateIndex.
+
+        :param reload_func: A function to call if we find we are missing an
+            index. Should have the form reload_func() => True if the list of
+            active pack files has changed.
+        """
+        self._reload_func = reload_func
+        self.index_to_pack = {}
+        self.combined_index = _mod_index.CombinedGraphIndex([],
+                                reload_func=reload_func)
+        # self.data_access = _DirectPackAccess(self.index_to_pack,
+        #                                      reload_func=reload_func,
+        #                                      flush_func=flush_func)
+        self.add_callback = None
+
+    def add_index(self, index, pack):
+        """Add index to the aggregate, which is an index for Pack pack.
+
+        Future searches on the aggregate index will search this new index
+        before all previously inserted indices.
+
+        :param index: An Index for the pack.
+        :param pack: A Pack instance.
+        """
+        # expose it to the index map
+        ## self.index_to_pack[index] = pack.access_tuple()
+        # put it at the front of the linear index list
+        self.combined_index.insert_index(0, index)
+
+    # def add_writable_index(self, index, pack):
+    #     """Add an index which is able to have data added to it.
+
+    #     There can be at most one writable index at any time.  Any
+    #     modifications made to the knit are put into this index.
+
+    #     :param index: An index from the pack parameter.
+    #     :param pack: A Pack instance.
+    #     """
+    #     if self.add_callback is not None:
+    #         raise AssertionError(
+    #             "%s already has a writable index through %s" % \
+    #             (self, self.add_callback))
+    #     # allow writing: queue writes to a new index
+    #     self.add_index(index, pack)
+    #     # Updates the index to packs mapping as a side effect,
+    #     self.data_access.set_writer(pack._writer, index, pack.access_tuple())
+    #     self.add_callback = index.add_nodes
+
+    # def clear(self):
+    #     """Reset all the aggregate data to nothing."""
+    #     self.data_access.set_writer(None, None, (None, None))
+    #     self.index_to_pack.clear()
+    #     del self.combined_index._indices[:]
+    #     self.add_callback = None
+
+    # def remove_index(self, index):
+    #     """Remove index from the indices used to answer queries.
+
+    #     :param index: An index from the pack parameter.
+    #     """
+    #     del self.index_to_pack[index]
+    #     self.combined_index._indices.remove(index)
+    #     if (self.add_callback is not None and
+    #         getattr(index, 'add_nodes', None) == self.add_callback):
+    #         self.add_callback = None
+    #         self.data_access.set_writer(None, None, (None, None))
+
+
 class PackCollection(object):
     """This manages a collection of pack files.
 
@@ -495,8 +579,6 @@
     repacked, and triggers that at an appropriate time.
     """
 
-    # TODO: _new_pack_class
-
     def __init__(self, memo_tracker, pack_policy):
         self.pack_policy = pack_policy
         self.memo_tracker = memo_tracker
@@ -511,9 +593,16 @@
         assert name not in self._packs
         pack = self.pack_policy.open_pack_from_memo(name, value)
         self._packs[name] = pack
-        # TODO: Do something with the aggregate indexes
-        for name, index in pack.get_indexes().iteritems():
-            pass
+        for index_name, index in pack.get_indexes().iteritems():
+            # TODO: should we be stricter that all pack files have identical
+            #       indexes?
+            if index_name not in self._aggregate_indexes:
+                # TODO: need to handle reload_func and flush_func
+                agg_index = AggregateIndex()
+                self._aggregate_indexes[index_name] = agg_index
+            else:
+                agg_index = self._aggregate_indexes[index_name]
+            agg_index.add_index(index, pack)
         return pack
 
     def _remove_pack_from_memo(self, name, value):

=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py	2010-03-09 18:07:51 +0000
+++ b/bzrlib/tests/test_pack_collection.py	2010-03-09 18:17:42 +0000
@@ -476,3 +476,5 @@
         collection = pack_collection.PackCollection(tracker,
             SingleTransportPackPolicy(self.get_transport()))
         self.assertEqual(['t-pack'], sorted(collection._packs.keys()))
+        # We should also have the aggregate index created.
+        self.assertEqual(['t_index'], sorted(collection._aggregate_indexes))