Rev 5107: Start fleshing out a PackCollection. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

John Arbash Meinel john at arbash-meinel.com
Tue Mar 9 17:42:36 GMT 2010


At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5107
revision-id: john at arbash-meinel.com-20100309174226-210ezid8ioe8575n
parent: john at arbash-meinel.com-20100308224127-lbefzxzk450rm612
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Tue 2010-03-09 11:42:26 -0600
message:
  Start fleshing out a PackCollection.
  
  It is a bit complex, because some of the abstractions start showing up.
  However, we are making a bit of progress. We'll get there.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py	2010-03-08 22:41:27 +0000
+++ b/bzrlib/pack_collection.py	2010-03-09 17:42:26 +0000
@@ -225,18 +225,23 @@
     records that end up removed. It also tracks records that we recently added
     (but may not have been written down yet.)
 
-    :ivar policy: A MemoTrackerPolicy instance defining what happens to
-        secondary actors when things change.
+    :ivar locking_policy: A subclass of LockingPolicy, which defines actions to
+        perform when we want to cache or mutate the content.
+    :ivar index_policy: Information about how to read and write the actual disk
+        content.
+    :ivar update_policy: An instance that is used to trigger callbacks when
+        entries are added and removed.
     :ivar _memos: A dict mapping a name to an index memo.
-    :ivar _info_at_load: A set of (name,memo) pairs that we read when we last
-        read the disk index.  (As opposed to new memos that have been added but
-        not yet written to disk.)
+    :ivar _last_read_memos: A set of (name,memo) pairs that we read when we
+        last read the disk index.  (As opposed to new memos that have been
+        added but not yet written to disk.)
     """
 
-    def __init__(self, locking_policy, index_policy, update_policy):
+    def __init__(self, locking_policy, index_policy, update_policy=None):
         self.locking_policy = locking_policy
         self.index_policy = index_policy
-        self.update_policy = update_policy
+        if update_policy is None:
+            self.update_policy = NoopUpdatePolicy()
         self._memos = None
         # The set of memos() that were on disk when we read it last
         self._last_read_memos = None
@@ -252,6 +257,9 @@
         else:
             # TODO: Should we call 'add_memo' or 'init_memo' for all memos that
             #       are already present?
+            #       For now the caller just calls .memos() itself. Though one
+            #       nice bit, is that we could delay reading 'pack-names'
+            #       until it is actually used, rather than at lock time.
             memos = set(self.index_policy.read_index())
             self._memos = dict(memos)
             self._last_read_memos = memos
@@ -269,6 +277,8 @@
             return True
         (on_disk, active, d_removed, d_added, m_removed,
          m_added) = self._diff_from_disk()
+        # TODO: What about if m_added overlaps with d_added (concurrent push of
+        #       the same data...
         for memo in d_removed:
             self.remove_memo(*memo)
         for memo in d_added:
@@ -430,21 +440,37 @@
 class Pack(object):
     """Tracks a bunch of content and some associated indices."""
 
-    def __init__(self):
+    def __init__(self, name):
+        # The individual indexes for this pack (eg, revision index, text index,
+        # etc.) Maps 'name' of index to the actual GraphIndex class. Note that
+        # the index class will depend on the specifics for this format
         self._indexes = {}
-
+        self.name = name
+        self.index_info = None
+
+    def get_indexes(self):
+        """Get all indexes associated with this Pack file.
+
+        This is used by PackCollection to combine the individual indexes into
+        an AggregateIndex.
+        """
+        return self._indexes
+
+    # TODO: This should only be needed for NewPack...
     def get_index_memo(self):
         """Return a byte string that can be used to initialize indexes."""
         raise NotImplementedError(self.get_index_memo)
 
     @classmethod
-    def from_index_memo(cls, memo):
+    def from_index_memo(cls, collection, name, memo):
         """Return a new Pack instance, initialized from the given index memo.
 
+        :param collection: A PackCollection that this pack is associated with
         :param memo: A memo that was returned by 'get_index_memo' at some point
             previously.
         :return: A Pack instance
         """
+        raise NotImplementedError(cls.from_index_memo)
 
 
 class PackCollection(object):
@@ -456,14 +482,42 @@
     repacked, and triggers that at an appropriate time.
     """
 
-    def __init__(self, memo_tracker):
+    # TODO: _new_pack_class
+
+    def __init__(self, memo_tracker, pack_class):
+        self._pack_class = pack_class
         self.memo_tracker = memo_tracker
+        self.memo_tracker.update_policy = PackCollectionUpdatePolicy(self)
         self._packs = {}
-
-
-    def get_vf_interface(self, index):
-        """Get a VersionedFiles abstraction for the given named index."""
+        # Aggregated indexes across all packs
+        self._aggregate_indexes = {}
+        for name, value in self.memo_tracker.memos():
+            self._add_pack_from_memo(name, value)
+
+    def _add_pack_from_memo(self, name, value):
+        assert name not in self._packs
+        pack = self._pack_class.from_index_memo(self, name, value)
+        self._packs[name] = pack
+        # TODO: Do something with the aggregate indexes
+        for name, index in pack.get_indexes().iteritems():
+            pass
+        return pack
+
+    def _remove_pack_from_memo(self, name, value):
+        pack = self._packs.pop(name)
+        assert pack.name == name
+        assert pack.get_index_memo() == value
+        # TODO: Do something with the aggregate indexes
 
 
 class PackCollectionUpdatePolicy(UpdatePolicy):
     """Update the pack collection as items are added and removed."""
+
+    def __init__(self, pack_collection):
+        self.pack_collection = pack_collection
+
+    def memo_added(self, name, value):
+        self.pack_collection._add_pack_from_memo(name, value)
+
+    def memo_removed(self, name, value):
+        self.pack_collection._remove_pack_from_memo(name, value)

=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py	2010-03-08 22:14:17 +0000
+++ b/bzrlib/tests/test_pack_collection.py	2010-03-09 17:42:26 +0000
@@ -170,8 +170,7 @@
         tracker = pack_collection.MemoTracker(
             TrivialLockingPolicy(),
             pack_collection.GraphIndexPolicy(self.transport, 'meta',
-                btree_index.BTreeBuilder, btree_index.BTreeGraphIndex),
-            pack_collection.NoopUpdatePolicy())
+                btree_index.BTreeBuilder, btree_index.BTreeGraphIndex))
         return tracker
 
     def assertDiffFromDisk(self, on_disk, active, d_removed, d_added,
@@ -430,3 +429,47 @@
         self.assertEqual([('removed', 'name1', 'content1'),
                           ('added', 'name1', 'alternate-content'),
                          ], policy.log)
+
+
+class TestPack(pack_collection.Pack):
+    """A class which defines a couple indexes, and exposes them."""
+
+    _index_class = btree_index.BTreeGraphIndex
+
+    def get_index_memo(self):
+        return bencode.bencode(self._index_info)
+
+    @classmethod
+    def from_index_memo(cls, collection, name, memo):
+        info = bencode.bdecode(memo)
+        assert isinstance(info, dict)
+        p = cls(name)
+        for index_name, info in info.iteritems():
+            size, = info
+            trans = collection.memo_tracker.index_policy.transport
+            index = cls._index_class(trans, name, size=size)
+            p._indexes[index_name] = index
+        p._index_info = info
+        return p
+
+
+class TestPackCollection(tests.TestCaseWithMemoryTransport):
+
+    def make_tracker(self):
+        return pack_collection.MemoTracker(
+            TrivialLockingPolicy(),
+            pack_collection.GraphIndexPolicy(self.get_transport(), 'meta',
+                btree_index.BTreeBuilder, btree_index.BTreeGraphIndex))
+
+    def test_init_from_empty_tracker(self):
+        tracker = self.make_tracker()
+        collection = pack_collection.PackCollection(tracker, TestPack)
+        self.assertEqual({}, collection._packs)
+        self.assertEqual({}, collection._aggregate_indexes)
+
+    def test_init_with_single_indexed_pack(self):
+        memo = bencode.bencode({'t_index': (1024,)})
+        tracker = self.make_tracker()
+        tracker.add_memo('t-pack', memo)
+        collection = pack_collection.PackCollection(tracker, TestPack)
+        self.assertEqual(['t-pack'], sorted(collection._packs.keys()))



More information about the bazaar-commits mailing list