Rev 5091: Add ability to save the nodes. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

Fri Mar 5 22:36:31 GMT 2010

At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5091
revision-id: john at arbash-meinel.com-20100305223556-e8r1sbrcrx97za52
parent: john at arbash-meinel.com-20100305205705-fy99019bdtggwq70
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Fri 2010-03-05 16:35:56 -0600
message:
  Add ability to save the nodes.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'

--- a/bzrlib/pack_collection.py	2010-03-05 20:57:05 +0000
+++ b/bzrlib/pack_collection.py	2010-03-05 22:35:56 +0000
@@ -124,10 +124,10 @@
             [(start_offset, file_st.st_size - start_offset)]).next()
         assert tail.startswith(expected_header)
         index_info_bytes = tail[len(expected_header):-12]
-        return cls.from_indicies_memo(transport, filename, index_info_bytes)
+        return cls.from_sections_memo(transport, filename, index_info_bytes)
 
     @classmethod
-    def from_indicies_memo(cls, transport, filename, memo_bytes):
+    def from_sections_memo(cls, transport, filename, memo_bytes):
         ti = cls(transport, filename)
         ti._sections = SectionInfoSerializer().from_bytes(memo_bytes)
         return ti
@@ -144,10 +144,136 @@
         return index_class(self._transport, self._filename, size=length,
                            **kwargs)
 
-    def get_indicies_memo(self):
+    def get_sections_memo(self):
         """Get a string giving the hints about where indices are located.
 
         This is used to aggregate indices across separate pack files into a
         single meta-index. (eg 'pack-names').
         """
         # return SectionInfoSerializer().to_bytes(self._sections)
+
+
+class MemoTracker(object):
+    """Manage the collection of section memos.
+
+    This is generally another file which contains all of the memos generated
+    for each pack file (eg '.bzr/repository/pack-names' for Repositories).
+
+    This keeps track of memos that we've read from disk, which may include
+    records that end up removed. It also tracks records that we recently added
+    (but may not have been written down yet.)
+
+    :ivar transport: A Transport describing where our data is.
+    :ivar filename: The name of the index file where we will store the
+        collected memos.
+    :ivar _memos: A dict mapping a name to an index memo.
+    :ivar _info_at_load: A set of (name,memo) pairs that we read when we last
+        read the disk index.  (As opposed to new memos that have been added but
+        not yet written to disk.)
+    """
+
+    _index_builder_class = None
+    _index_class = None
+
+    def __init__(self, transport, filename):
+        self.transport = transport
+        self.filename = filename
+        self._memos = None
+        # Names that have been added to this list, that have not yet been
+        # written to disk.
+        self._added_names = set()
+
+    def _do_check_safe_to_cache(self):
+        """Make sure that it is valid to cache state in memory.
+
+        Subclasses should override this to set policy. If it is not safe, an
+        exception should be raised (such as errors.ObjectNotLocked)
+        """
+        # Often this will be:
+        # if not self.repo.is_locked():
+        #     raise errors.ObjectNotLocked(self.repo)
+        raise NotImplementedError(self._do_check_safe_to_cache)
+
+    def _do_handle_missing_file(self):
+        """What should we do when the meta file is missing?
+
+        This will be called when the meta file cannot be found on disk.  Some
+        children may just want to create it on demand, others would consider it
+        a fatal error to be missing.
+
+        By default we silently succeed.
+        """
+        pass
+
+    def _do_get_file_mode(self):
+        """Determine the mode (permission bits) for the file we create."""
+        return None
+
+    def _ensure_loaded(self):
+        """Ensure that we have information read from the state file.
+
+        :return: True if we had to read data from disk
+        """
+        self._do_check_safe_to_cache()
+        if self._memos is not None:
+            result = False
+        else:
+            memos = {}
+            #info_at_load = set()
+            for _, key, memo in self._iter_disk_index():
+                name = key[0]
+                memos[name] = memo
+                #info_at_load.add((name, memo))
+            self._memos = memos
+            result = True
+        return result
+
+    def reload(self):
+        """Reload the meta-information based on what has been written to disk.
+
+        :return: Information about what content has changed.
+        """
+        if self._ensure_loaded():
+            # We haven't read anything before, so obviously the memory
+            # structures changed..
+            return {'added': set(self._memos)}
+        assert False
+
+    def _iter_disk_index(self):
+        """Iterate the contents of the aggregated memos.
+
+        This always reads the file (no caching).
+        """
+        try:
+            index = self._index_class(self.transport, self.filename, size=None)
+            index.key_count() # Trigger reading the header
+        except errors.NoSuchFile, e:
+            self._do_handle_missing_file()
+            # if _do_handle_missing_file doesn't raise, then we just return
+            # nothing, since there is nothing on disk to iterate.
+            return []
+        return index.iter_all_entries()
+
+    def get_memo(self, name):
+        """Get the section (index) memo for the given name."""
+
+    def save(self):
+        """Save the current in-memory content to disk."""
+        # TODO: Ensure disk locking
+        builder = self._index_builder_class(reference_lists=0, key_elements=1)
+        for name, value in self._memos:
+            builder.add_node((name,), value)
+        self.transport.put_file(self.filename, builder.finish(),
+                                mode=self._do_get_file_mode())
+
+
+
+class PackCollection(object):
+    """This manages a collection of pack files.
+
+    Similar in concept to RepositoryPackCollection, except this does not try to
+    know *what* indexes are available. It manages a group of pack files, and
+    possibly their associated indices, and works out when things need to be
+    repacked, and triggers that at an appropriate time.
+    """
+

=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py	2010-03-05 20:57:05 +0000
+++ b/bzrlib/tests/test_pack_collection.py	2010-03-05 22:35:56 +0000
@@ -125,3 +125,30 @@
         assert_index_content(self, {('key1',): ('value1',),
                                     ('key2',): ('value2',),
                                    }, text_index)
+
+
+
+class TrivialMemoTracker(pack_collection.MemoTracker):
+    """Stub out the necessary functionality with trivial implementations."""
+
+    _index_builder_class = btree_index.BTreeBuilder
+    _index_class = btree_index.BTreeGraphIndex
+
+    def _do_check_safe_to_cache(self):
+        return # Always safe
+
+
+class TestMemoTracker(tests.TestCaseWithMemoryTransport):
+
+    def test__ensure_no_file(self):
+        t = self.get_transport()
+        tracker = TrivialMemoTracker(t, 'meta')
+        # Shouldn't raise an exception, but just set up that there isn't any
+        # content
+        self.assertTrue(tracker._ensure_loaded())
+        self.assertEqual({}, tracker._memos)
+        # We should create the file at this time, and it should be a valid
+        # index file
+        tracker.save()
+        btree = tracker._index_class(t, 'meta', size=None)
+        self.assertEqual(0, btree.key_count())