Rev 5109: Introduce the aggregate-index concept. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Tue Mar 9 18:17:52 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5109
revision-id: john at arbash-meinel.com-20100309181742-l81zsib6fbtx5yju
parent: john at arbash-meinel.com-20100309180751-y6i1eub2mbkabjw6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Tue 2010-03-09 12:17:42 -0600
message:
Introduce the aggregate-index concept.
The idea is that most searches return the index where something is found,
and then we indirect through AggregateIndex.index_to_pack and
its .data_access member in order to get back to the original pack file.
We probably want to keep that in the long run.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py 2010-03-09 18:07:51 +0000
+++ b/bzrlib/pack_collection.py 2010-03-09 18:17:42 +0000
@@ -26,6 +26,7 @@
from bzrlib import (
bencode,
btree_index,
+ index as _mod_index,
errors,
)
@@ -486,6 +487,89 @@
raise NotImplementedError(self.create_pack_from_memo)
+# Note: copied from pack_repo.py, and trimmed
+class AggregateIndex(object):
+ """An aggregated index for PackCollection.
+
+ AggregateIndex is reponsible for managing the PackAccess object,
+ Index-To-Pack mapping, and all indices list for a specific type of index
+ such as 'revision index'.
+
+ A CombinedIndex provides an index on a single key space built up
+ from several on-disk indices. The AggregateIndex builds on this
+ to provide a data access layer, and allows having up to one writable
+ index within the collection.
+ """
+
+ def __init__(self, reload_func=None, flush_func=None):
+ """Create an AggregateIndex.
+
+ :param reload_func: A function to call if we find we are missing an
+ index. Should have the form reload_func() => True if the list of
+ active pack files has changed.
+ """
+ self._reload_func = reload_func
+ self.index_to_pack = {}
+ self.combined_index = _mod_index.CombinedGraphIndex([],
+ reload_func=reload_func)
+ # self.data_access = _DirectPackAccess(self.index_to_pack,
+ # reload_func=reload_func,
+ # flush_func=flush_func)
+ self.add_callback = None
+
+ def add_index(self, index, pack):
+ """Add index to the aggregate, which is an index for Pack pack.
+
+ Future searches on the aggregate index will search this new index
+ before all previously inserted indices.
+
+ :param index: An Index for the pack.
+ :param pack: A Pack instance.
+ """
+ # expose it to the index map
+ ## self.index_to_pack[index] = pack.access_tuple()
+ # put it at the front of the linear index list
+ self.combined_index.insert_index(0, index)
+
+ # def add_writable_index(self, index, pack):
+ # """Add an index which is able to have data added to it.
+
+ # There can be at most one writable index at any time. Any
+ # modifications made to the knit are put into this index.
+
+ # :param index: An index from the pack parameter.
+ # :param pack: A Pack instance.
+ # """
+ # if self.add_callback is not None:
+ # raise AssertionError(
+ # "%s already has a writable index through %s" % \
+ # (self, self.add_callback))
+ # # allow writing: queue writes to a new index
+ # self.add_index(index, pack)
+ # # Updates the index to packs mapping as a side effect,
+ # self.data_access.set_writer(pack._writer, index, pack.access_tuple())
+ # self.add_callback = index.add_nodes
+
+ # def clear(self):
+ # """Reset all the aggregate data to nothing."""
+ # self.data_access.set_writer(None, None, (None, None))
+ # self.index_to_pack.clear()
+ # del self.combined_index._indices[:]
+ # self.add_callback = None
+
+ # def remove_index(self, index):
+ # """Remove index from the indices used to answer queries.
+
+ # :param index: An index from the pack parameter.
+ # """
+ # del self.index_to_pack[index]
+ # self.combined_index._indices.remove(index)
+ # if (self.add_callback is not None and
+ # getattr(index, 'add_nodes', None) == self.add_callback):
+ # self.add_callback = None
+ # self.data_access.set_writer(None, None, (None, None))
+
+
class PackCollection(object):
"""This manages a collection of pack files.
@@ -495,8 +579,6 @@
repacked, and triggers that at an appropriate time.
"""
- # TODO: _new_pack_class
-
def __init__(self, memo_tracker, pack_policy):
self.pack_policy = pack_policy
self.memo_tracker = memo_tracker
@@ -511,9 +593,16 @@
assert name not in self._packs
pack = self.pack_policy.open_pack_from_memo(name, value)
self._packs[name] = pack
- # TODO: Do something with the aggregate indexes
- for name, index in pack.get_indexes().iteritems():
- pass
+ for index_name, index in pack.get_indexes().iteritems():
+ # TODO: should we be stricter that all pack files have identical
+ # indexes?
+ if index_name not in self._aggregate_indexes:
+ # TODO: need to handle reload_func and flush_func
+ agg_index = AggregateIndex()
+ self._aggregate_indexes[index_name] = agg_index
+ else:
+ agg_index = self._aggregate_indexes[index_name]
+ agg_index.add_index(index, pack)
return pack
def _remove_pack_from_memo(self, name, value):
=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py 2010-03-09 18:07:51 +0000
+++ b/bzrlib/tests/test_pack_collection.py 2010-03-09 18:17:42 +0000
@@ -476,3 +476,5 @@
collection = pack_collection.PackCollection(tracker,
SingleTransportPackPolicy(self.get_transport()))
self.assertEqual(['t-pack'], sorted(collection._packs.keys()))
+ # We should also have the aggregate index created.
+ self.assertEqual(['t_index'], sorted(collection._aggregate_indexes))
More information about the bazaar-commits
mailing list