Rev 5098: Refactor bits into a Policy class. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

John Arbash Meinel john at arbash-meinel.com
Mon Mar 8 19:55:59 GMT 2010


At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5098
revision-id: john at arbash-meinel.com-20100308195555-wbu2h575236ltlm5
parent: john at arbash-meinel.com-20100308170924-ha2q41s161kfu1lc
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Mon 2010-03-08 13:55:55 -0600
message:
  Refactor bits into a Policy class.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py	2010-03-08 17:09:24 +0000
+++ b/bzrlib/pack_collection.py	2010-03-08 19:55:55 +0000
@@ -153,6 +153,87 @@
         # return SectionInfoSerializer().to_bytes(self._sections)
 
 
+class MemoTrackerPolicy(object):
+    """Policy class determining what to do for specific MemoTracker actions.
+
+    Rather than having implementations subclass MemoTracker to get appropriate
+    functionality, we implement it with a Policy object. This is mostly just
+    done to make it clearer what functionality is custom.
+    """
+
+    def read_index(self):
+        """Read the existing index.
+        
+        :return: an iterable of (name, value) pairs.
+        """
+        raise NotImplementedError(self.read_index)
+
+    def write_index(self, memos):
+        """Write a new index to disk.
+
+        :param memos: an iterable of (name, value) pairs
+        """
+        raise NotImplementedError(self.write_index)
+
+    def safe_to_cache(self):
+        """Raise an exception if it is unsafe to cache data in memory."""
+        raise NotImplementedError(self.safe_to_cache)
+
+    def lock_write(self):
+        """We are about to update the file, make sure that is safe to do."""
+        raise NotImplementedError(self.lock_write)
+
+    def unlock(self):
+        """We have finished writing, unlock."""
+        raise NotImplementedError(self.unlock)
+
+
+class TransportBTreeMemoTrackerPolicy(object):
+    """A policy that uses a BTreeIndex at a particular location."""
+
+    _index_builder_class = btree_index.BTreeBuilder
+    _index_class = btree_index.BTreeGraphIndex
+
+    def __init__(self, transport, filename):
+        self.transport = transport
+        self.filename = filename
+
+    def _do_handle_missing_file(self):
+        """What should we do if the file is not present when we want to read?
+
+        By default, we do nothing, but a child can chose to raise an exception
+        (meaning the file must always exist.)
+        """
+
+    def _do_get_file_mode(self):
+        """Figure out what file mode we should use.
+
+        Default is to just use 'None' which uses OS defaults.
+        """
+        return None
+
+    def read_index(self):
+        """See MemoTrackerPolicy.read_index"""
+        try:
+            index = self._index_class(self.transport, self.filename, size=None)
+            index.key_count() # Trigger reading the header
+        except errors.NoSuchFile, e:
+            self._do_handle_missing_file()
+            # if _do_handle_missing_file doesn't raise, then we just return
+            # nothing, since there is nothing on disk to iterate.
+            return []
+        return [(k[0], v) for _, k, v in index.iter_all_entries()]
+
+    def write_index(self, memos):
+        """See MemoTrackerPolicy.write_index"""
+        builder = self._index_builder_class(reference_lists=0,
+                                            key_elements=1)
+        for name, value in memos:
+            builder.add_node((name,), value)
+        self.transport.put_file(self.filename, builder.finish(),
+                                mode=self._do_get_file_mode())
+
+
 class MemoTracker(object):
     """Manage the collection of section memos.
 
@@ -163,68 +244,32 @@
     records that end up removed. It also tracks records that we recently added
     (but may not have been written down yet.)
 
-    :ivar transport: A Transport describing where our data is.
-    :ivar filename: The name of the index file where we will store the
-        collected memos.
+    :ivar policy: A MemoTrackerPolicy instance defining what happens to
+        secondary actors when things change.
     :ivar _memos: A dict mapping a name to an index memo.
     :ivar _info_at_load: A set of (name,memo) pairs that we read when we last
         read the disk index.  (As opposed to new memos that have been added but
         not yet written to disk.)
     """
 
-    _index_builder_class = None
-    _index_class = None
-
-    def __init__(self, transport, filename):
-        self.transport = transport
-        self.filename = filename
+    def __init__(self, policy):
+        self.policy = policy
         self._memos = None
         # The set of memos() that were on disk when we read it last
         self._last_read_memos = None
 
-    def _do_check_safe_to_cache(self):
-        """Make sure that it is valid to cache state in memory.
-
-        Subclasses should override this to set policy. If it is not safe, an
-        exception should be raised (such as errors.ObjectNotLocked)
-        """
-        # Often this will be:
-        # if not self.repo.is_locked():
-        #     raise errors.ObjectNotLocked(self.repo)
-        raise NotImplementedError(self._do_check_safe_to_cache)
-
-    def _do_handle_missing_file(self):
-        """What should we do when the meta file is missing?
-
-        This will be called when the meta file cannot be found on disk.  Some
-        children may just want to create it on demand, others would consider it
-        a fatal error to be missing.
-
-        By default we silently succeed.
-        """
-        pass
-
-    def _do_get_file_mode(self):
-        """Determine the mode (permission bits) for the file we create."""
-        return None
-
     def _ensure_loaded(self):
         """Ensure that we have information read from the state file.
 
         :return: True if we had to read data from disk
         """
-        self._do_check_safe_to_cache()
+        self.policy.safe_to_cache()
         if self._memos is not None:
             result = False
         else:
-            memos = {}
-            #info_at_load = set()
-            for _, key, memo in self._iter_disk_index():
-                name = key[0]
-                memos[name] = memo
-                #info_at_load.add((name, memo))
-            self._memos = memos
-            self._last_read_memos = set(self._memos.iteritems())
+            memos = set(self.policy.read_index())
+            self._memos = dict(memos)
+            self._last_read_memos = memos
             result = True
         return result
 
@@ -233,26 +278,21 @@
 
         :return: Information about what content has changed.
         """
+        assert False # We haven't tested this yet
         if self._ensure_loaded():
             # We haven't read anything before, so obviously the memory
             # structures changed..
-            return {'added': set(self._memos)}
-        assert False
-
-    def _iter_disk_index(self):
-        """Iterate the contents of the aggregated memos.
-
-        This always reads the file (no caching).
-        """
-        try:
-            index = self._index_class(self.transport, self.filename, size=None)
-            index.key_count() # Trigger reading the header
-        except errors.NoSuchFile, e:
-            self._do_handle_missing_file()
-            # if _do_handle_missing_file doesn't raise, then we just return
-            # nothing, since there is nothing on disk to iterate.
-            return []
-        return index.iter_all_entries()
+            return True
+        (on_disk, active, d_removed, d_added, m_removed,
+         m_added) = self._diff_from_disk()
+        for memo in d_removed:
+            self.remove_memo(*memo)
+        for memo in d_added:
+            self.add_memo(*memo)
+        # We aren't writing out our in-memory changes, so only update
+        # _last_read_memos to include what was already on disk
+        self._last_read_memos = on_disk
+        return bool(d_removed or d_added)
 
     def _diff_from_disk(self):
         """Compare the current memos in memory versus the ones on disk.
@@ -275,10 +315,7 @@
                         tracking
             mem_added   Ones not in _last_read_memos that we have locally added
         """
-        disk_memos = set()
-        for _, key, value in self._iter_disk_index():
-            name = key[0]
-            disk_memos.add((name, value))
+        disk_memos = set(self.policy.read_index())
         disk_added = disk_memos - self._last_read_memos
         disk_removed = self._last_read_memos - disk_memos
         mem_memos = set(self._memos.items())
@@ -309,21 +346,26 @@
 
     def save(self):
         """Save the current in-memory content to disk."""
-        # TODO: Ensure disk locking
-        (on_disk, active, d_removed, d_added, m_removed,
-         m_added) = self._diff_from_disk()
-        builder = self._index_builder_class(reference_lists=0, key_elements=1)
-        for name, value in active:
-            builder.add_node((name,), value)
-        self.transport.put_file(self.filename, builder.finish(),
-                                mode=self._do_get_file_mode())
-        self._last_read_memos = set(self._memos.iteritems())
-        # TODO: We probably need to either return this info, or trigger
-        #       callbacks, etc so that higher level code can do something
-        for memo in d_removed:
-            self.remove_memo(*memo)
-        for memo in d_added:
-            self.add_memo(*memo)
+        self.policy.lock_write()
+        try:
+            (on_disk, active, d_removed, d_added, m_removed,
+             m_added) = self._diff_from_disk()
+            self.policy.write_index(active)
+            # XXX:  There is a bug here, test it. self._memos does not (yet)
+            #       match active
+            self._last_read_memos = set(self._memos.iteritems())
+            # TODO: We probably need to either return this info, or trigger
+            #       callbacks, etc so that higher level code can do something
+            #       The pack_collection code did this via a helper
+            #       '_synchronize_pack_names_from_disk_nodes', but it also re-did
+            #       all the work to determine what was removed and added again.
+            #       Seems a bit redundant.
+            for memo in d_removed:
+                self.remove_memo(*memo)
+            for memo in d_added:
+                self.add_memo(*memo)
+        finally:
+            self.policy.unlock()
 
 
 

=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py	2010-03-08 17:09:24 +0000
+++ b/bzrlib/tests/test_pack_collection.py	2010-03-08 19:55:55 +0000
@@ -128,14 +128,17 @@
 
 
 
-class TrivialMemoTracker(pack_collection.MemoTracker):
+class TrivialMemoTrackerPolicy(pack_collection.TransportBTreeMemoTrackerPolicy):
     """Stub out the necessary functionality with trivial implementations."""
 
-    _index_builder_class = btree_index.BTreeBuilder
-    _index_class = btree_index.BTreeGraphIndex
-
-    def _do_check_safe_to_cache(self):
-        return # Always safe
+    def safe_to_cache(self):
+        return #Always safe
+
+    def lock_write(self):
+        return #No lock checking
+
+    def unlock(self):
+        return # No lock checking
 
 
 class TestMemoTracker(tests.TestCaseWithMemoryTransport):
@@ -149,7 +152,12 @@
     def setUp(self):
         super(TestMemoTracker, self).setUp()
         self.transport = self.get_transport()
-        self.tracker = TrivialMemoTracker(self.transport, 'meta')
+        self.tracker = self.make_tracker()
+
+    def make_tracker(self):
+        policy = TrivialMemoTrackerPolicy(self.transport, 'meta')
+        tracker = pack_collection.MemoTracker(policy)
+        return tracker
 
     def assertDiffFromDisk(self, on_disk, active, d_removed, d_added,
                            m_removed, m_added):
@@ -167,7 +175,8 @@
         # We should create the file at this time, and it should be a valid
         # index file
         self.tracker.save()
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(0, btree.key_count())
 
     def test_add_memo(self):
@@ -179,7 +188,8 @@
         self.assertEqual({'name1': 'content1'}, self.tracker._memos)
         self.assertEqual([('name1', 'content1')],
                          sorted(self.tracker._last_read_memos))
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(1, btree.key_count())
         self.assertEqual([(('name1',), 'content1')],
                  sorted([(n[1], n[2]) for n in btree.iter_all_entries()]))
@@ -200,18 +210,20 @@
                          ], sorted(self.tracker._last_read_memos))
         self.assertEqual({'name1': 'content1'}, self.tracker._memos)
         # Before saving, we should have both keys
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(2, btree.key_count())
         self.tracker.save()
         self.assertEqual([('name1', 'content1')],
                          sorted(self.tracker._last_read_memos))
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(1, btree.key_count())
 
     def test_add_with_concurrent_add(self):
         self.tracker.add_memo(*self._m1)
         self.tracker.save()
-        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2 = self.make_tracker()
         tracker2.add_memo(*self._m2)
         self.tracker.add_memo(*self._m3)
         tracker2.save()
@@ -223,13 +235,14 @@
                           'name3': 'content3',
                          }, self.tracker._memos)
         # And the on-disk record should also have all 3
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(3, btree.key_count())
 
     def test_concurrent_remove(self):
         self.tracker.add_memo(*self._m1)
         self.tracker.save()
-        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2 = self.make_tracker()
         tracker2.remove_memo(*self._m1)
         self.tracker.add_memo(*self._m2)
         tracker2.save()
@@ -239,7 +252,8 @@
         self.assertEqual({'name2': 'content2',
                          }, self.tracker._memos)
         # And the on-disk record should also have all 3
-        btree = self.tracker._index_class(self.transport, 'meta', size=None)
+        btree = self.tracker.policy._index_class(self.transport, 'meta',
+                                                 size=None)
         self.assertEqual(1, btree.key_count())
 
     def test__diff_from_disk_no_changes(self):
@@ -288,7 +302,7 @@
         self.tracker.add_memo(*self._m1)
         self.tracker.add_memo(*self._m2)
         self.tracker.save()
-        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2 = self.make_tracker()
         tracker2.remove_memo(*self._m2)
         tracker2.save()
         self.assertDiffFromDisk(on_disk=[self._m1],
@@ -301,7 +315,7 @@
     def test__diff_disk_added(self):
         self.tracker.add_memo(*self._m1)
         self.tracker.save()
-        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2 = self.make_tracker()
         tracker2.add_memo(*self._m2)
         tracker2.save()
         self.assertDiffFromDisk(on_disk=[self._m1, self._m2],
@@ -316,7 +330,7 @@
         self.tracker.add_memo(*self._m2)
         self.tracker.add_memo(*self._m3)
         self.tracker.save()
-        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2 = self.make_tracker()
         tracker2.add_memo(*self._m4)
         tracker2.remove_memo(*self._m3)
         tracker2.save()



More information about the bazaar-commits mailing list