Rev 5091: Add ability to save the nodes. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 5 22:36:31 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5091
revision-id: john at arbash-meinel.com-20100305223556-e8r1sbrcrx97za52
parent: john at arbash-meinel.com-20100305205705-fy99019bdtggwq70
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Fri 2010-03-05 16:35:56 -0600
message:
Add ability to save the nodes.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py 2010-03-05 20:57:05 +0000
+++ b/bzrlib/pack_collection.py 2010-03-05 22:35:56 +0000
@@ -124,10 +124,10 @@
[(start_offset, file_st.st_size - start_offset)]).next()
assert tail.startswith(expected_header)
index_info_bytes = tail[len(expected_header):-12]
- return cls.from_indicies_memo(transport, filename, index_info_bytes)
+ return cls.from_sections_memo(transport, filename, index_info_bytes)
@classmethod
- def from_indicies_memo(cls, transport, filename, memo_bytes):
+ def from_sections_memo(cls, transport, filename, memo_bytes):
ti = cls(transport, filename)
ti._sections = SectionInfoSerializer().from_bytes(memo_bytes)
return ti
@@ -144,10 +144,136 @@
return index_class(self._transport, self._filename, size=length,
**kwargs)
- def get_indicies_memo(self):
+ def get_sections_memo(self):
"""Get a string giving the hints about where indices are located.
This is used to aggregate indices across separate pack files into a
single meta-index. (eg 'pack-names').
"""
# return SectionInfoSerializer().to_bytes(self._sections)
+
+
+class MemoTracker(object):
+ """Manage the collection of section memos.
+
+ This is generally another file which contains all of the memos generated
+ for each pack file (eg '.bzr/repository/pack-names' for Repositories).
+
+ This keeps track of memos that we've read from disk, which may include
+ records that end up removed. It also tracks records that we recently added
+ (but may not have been written down yet.)
+
+ :ivar transport: A Transport describing where our data is.
+ :ivar filename: The name of the index file where we will store the
+ collected memos.
+ :ivar _memos: A dict mapping a name to an index memo.
+ :ivar _info_at_load: A set of (name,memo) pairs that we read when we last
+ read the disk index. (As opposed to new memos that have been added but
+ not yet written to disk.)
+ """
+
+ _index_builder_class = None
+ _index_class = None
+
+ def __init__(self, transport, filename):
+ self.transport = transport
+ self.filename = filename
+ self._memos = None
+ # Names that have been added to this list, that have not yet been
+ # written to disk.
+ self._added_names = set()
+
+ def _do_check_safe_to_cache(self):
+ """Make sure that it is valid to cache state in memory.
+
+ Subclasses should override this to set policy. If it is not safe, an
+ exception should be raised (such as errors.ObjectNotLocked)
+ """
+ # Often this will be:
+ # if not self.repo.is_locked():
+ # raise errors.ObjectNotLocked(self.repo)
+ raise NotImplementedError(self._do_check_safe_to_cache)
+
+ def _do_handle_missing_file(self):
+ """What should we do when the meta file is missing?
+
+ This will be called when the meta file cannot be found on disk. Some
+ children may just want to create it on demand, others would consider it
+ a fatal error to be missing.
+
+ By default we silently succeed.
+ """
+ pass
+
+ def _do_get_file_mode(self):
+ """Determine the mode (permission bits) for the file we create."""
+ return None
+
+ def _ensure_loaded(self):
+ """Ensure that we have information read from the state file.
+
+ :return: True if we had to read data from disk
+ """
+ self._do_check_safe_to_cache()
+ if self._memos is not None:
+ result = False
+ else:
+ memos = {}
+ #info_at_load = set()
+ for _, key, memo in self._iter_disk_index():
+ name = key[0]
+ memos[name] = memo
+ #info_at_load.add((name, memo))
+ self._memos = memos
+ result = True
+ return result
+
+ def reload(self):
+ """Reload the meta-information based on what has been written to disk.
+
+ :return: Information about what content has changed.
+ """
+ if self._ensure_loaded():
+ # We haven't read anything before, so obviously the memory
+ # structures changed..
+ return {'added': set(self._memos)}
+ assert False
+
+ def _iter_disk_index(self):
+ """Iterate the contents of the aggregated memos.
+
+ This always reads the file (no caching).
+ """
+ try:
+ index = self._index_class(self.transport, self.filename, size=None)
+ index.key_count() # Trigger reading the header
+ except errors.NoSuchFile, e:
+ self._do_handle_missing_file()
+ # if _do_handle_missing_file doesn't raise, then we just return
+ # nothing, since there is nothing on disk to iterate.
+ return []
+ return index.iter_all_entries()
+
+ def get_memo(self, name):
+ """Get the section (index) memo for the given name."""
+
+ def save(self):
+ """Save the current in-memory content to disk."""
+ # TODO: Ensure disk locking
+ builder = self._index_builder_class(reference_lists=0, key_elements=1)
+ for name, value in self._memos:
+ builder.add_node((name,), value)
+ self.transport.put_file(self.filename, builder.finish(),
+ mode=self._do_get_file_mode())
+
+
+
+class PackCollection(object):
+ """This manages a collection of pack files.
+
+ Similar in concept to RepositoryPackCollection, except this does not try to
+ know *what* indexes are available. It manages a group of pack files, and
+ possibly their associated indices, and works out when things need to be
+ repacked, and triggers that at an appropriate time.
+ """
+
=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py 2010-03-05 20:57:05 +0000
+++ b/bzrlib/tests/test_pack_collection.py 2010-03-05 22:35:56 +0000
@@ -125,3 +125,30 @@
assert_index_content(self, {('key1',): ('value1',),
('key2',): ('value2',),
}, text_index)
+
+
+
+class TrivialMemoTracker(pack_collection.MemoTracker):
+ """Stub out the necessary functionality with trivial implementations."""
+
+ _index_builder_class = btree_index.BTreeBuilder
+ _index_class = btree_index.BTreeGraphIndex
+
+ def _do_check_safe_to_cache(self):
+ return # Always safe
+
+
+class TestMemoTracker(tests.TestCaseWithMemoryTransport):
+
+ def test__ensure_no_file(self):
+ t = self.get_transport()
+ tracker = TrivialMemoTracker(t, 'meta')
+ # Shouldn't raise an exception, but just set up that there isn't any
+ # content
+ self.assertTrue(tracker._ensure_loaded())
+ self.assertEqual({}, tracker._memos)
+ # We should create the file at this time, and it should be a valid
+ # index file
+ tracker.save()
+ btree = tracker._index_class(t, 'meta', size=None)
+ self.assertEqual(0, btree.key_count())
More information about the bazaar-commits
mailing list