Rev 5095: _diff_from_disk now does a better permutation of how nodes are added and removed. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Mon Mar 8 16:36:12 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5095
revision-id: john at arbash-meinel.com-20100308163606-n1z028fu2iwjt1zw
parent: john at arbash-meinel.com-20100305225419-073zq23gzeq3e648
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Mon 2010-03-08 10:36:06 -0600
message:
_diff_from_disk now does a better permutation of how nodes are added and removed.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py 2010-03-05 22:54:19 +0000
+++ b/bzrlib/pack_collection.py 2010-03-08 16:36:06 +0000
@@ -179,10 +179,8 @@
self.transport = transport
self.filename = filename
self._memos = None
- # Names that have been added to this list, that have not yet been
- # written to disk.
- self._added_info = set()
- self._removed_info = set()
+ # The set of memos() that were on disk when we read it last
+ self._last_read_memos = None
def _do_check_safe_to_cache(self):
"""Make sure that it is valid to cache state in memory.
@@ -226,6 +224,7 @@
memos[name] = memo
#info_at_load.add((name, memo))
self._memos = memos
+ self._last_read_memos = set(self._memos.iteritems())
result = True
return result
@@ -255,22 +254,44 @@
return []
return index.iter_all_entries()
- def _diff_memos_from_disk(self):
+ def _diff_from_disk(self):
"""Compare the current memos in memory versus the ones on disk.
Prepare for updating the file on disk.
+
+ :return: (on_disk, all_active, removed, added) All of these are sets()
+ of (name, memo) pairs. This means that *modifying* a value shows up
+ as an remove + add.
+
+ on_disk memos that are in the written file, this may have ones
+ we are removing, or ones added by someone else
+ all_active Our in-memory set. This is on_disk - ones we are
+ removing + ones we are adding
+ disk_removed Ones that were in _last_read_memos but are no longer
+ on disk
+ disk_added Ones that are in 'on_disk' but were not in
+ _last_read_memos
+ mem_removed Ones that were in _last_read_memos but we have stopped
+ tracking
+ mem_added Ones not in _last_read_memos that we have locally added
"""
- disk_nodes = set()
+ disk_memos = set()
for _, key, value in self._iter_disk_index():
name = key[0]
- disk_nodes.add((name, value))
+ disk_memos.add((name, value))
+ disk_added = disk_memos - self._last_read_memos
+ disk_removed = self._last_read_memos - disk_memos
+ mem_memos = set(self._memos.items())
+ mem_added = mem_memos - self._last_read_memos
+ mem_removed = self._last_read_memos - mem_memos
+ return (disk_memos, (disk_memos - mem_removed) | mem_added,
+ disk_removed, disk_added, mem_removed, mem_added)
def add_memo(self, name, value):
"""Add a memo to this cache."""
self._ensure_loaded()
assert name not in self._memos
self._memos[name] = value
- self._added_info.add((name, value))
def get_memo(self, name):
"""Get the section (index) memo for the given name."""
@@ -282,9 +303,9 @@
We require 'value' so that we can ensure it matches.
"""
+ self._ensure_loaded()
old = self._memos.pop(name)
assert old == value
- self._removed_info.add((name, value))
def save(self):
"""Save the current in-memory content to disk."""
@@ -295,8 +316,7 @@
builder.add_node((name,), value)
self.transport.put_file(self.filename, builder.finish(),
mode=self._do_get_file_mode())
- self._added_info = set()
- self._removed_info = set()
+ self._last_read_memos = set(self._memos.iteritems())
=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py 2010-03-05 22:54:19 +0000
+++ b/bzrlib/tests/test_pack_collection.py 2010-03-08 16:36:06 +0000
@@ -140,88 +140,158 @@
class TestMemoTracker(tests.TestCaseWithMemoryTransport):
+ _m1 = ('name1', 'content1')
+ _m2 = ('name2', 'content2')
+ _m3 = ('name3', 'content3')
+ _m4 = ('name4', 'content4')
+ _m5 = ('name5', 'content5')
+ _m5 = ('name6', 'content6')
+
+ def setUp(self):
+ super(TestMemoTracker, self).setUp()
+ self.transport = self.get_transport()
+ self.tracker = TrivialMemoTracker(self.transport, 'meta')
+
+ def assertDiffFromDisk(self, on_disk, active, d_removed, d_added,
+ m_removed, m_added):
+ actual = self.tracker._diff_from_disk()
+ self.assertEqual([sorted(on_disk), sorted(active),
+ sorted(d_removed), sorted(d_added),
+ sorted(m_removed), sorted(m_added)],
+ map(sorted, actual))
+
def test__ensure_no_file(self):
- t = self.get_transport()
- tracker = TrivialMemoTracker(t, 'meta')
# Shouldn't raise an exception, but just set up that there isn't any
# content
- self.assertTrue(tracker._ensure_loaded())
- self.assertEqual({}, tracker._memos)
+ self.assertTrue(self.tracker._ensure_loaded())
+ self.assertEqual({}, self.tracker._memos)
# We should create the file at this time, and it should be a valid
# index file
- tracker.save()
- btree = tracker._index_class(t, 'meta', size=None)
+ self.tracker.save()
+ btree = self.tracker._index_class(self.transport, 'meta', size=None)
self.assertEqual(0, btree.key_count())
def test_add_memo(self):
- t = self.get_transport()
- tracker = TrivialMemoTracker(t, 'meta')
- tracker.add_memo('test-name', 'content')
- self.assertEqual('content', tracker.get_memo('test-name'))
- self.assertEqual({'test-name': 'content'}, tracker._memos)
+ self.tracker.add_memo('test-name', 'content')
+ self.assertEqual('content', self.tracker.get_memo('test-name'))
+ self.assertEqual({'test-name': 'content'}, self.tracker._memos)
+ self.assertEqual([], sorted(self.tracker._last_read_memos))
+ self.tracker.save()
+ self.assertEqual({'test-name': 'content'}, self.tracker._memos)
self.assertEqual([('test-name', 'content')],
- sorted(tracker._added_info))
- self.assertEqual([], sorted(tracker._removed_info))
- tracker.save()
- self.assertEqual({'test-name': 'content'}, tracker._memos)
- self.assertEqual([], sorted(tracker._added_info))
- self.assertEqual([], sorted(tracker._removed_info))
- btree = tracker._index_class(t, 'meta', size=None)
+ sorted(self.tracker._last_read_memos))
+ btree = self.tracker._index_class(self.transport, 'meta', size=None)
self.assertEqual(1, btree.key_count())
self.assertEqual([(('test-name',), 'content')],
sorted([(n[1], n[2]) for n in btree.iter_all_entries()]))
- def test__diff_memos_from_disk(self):
- tracker = TrivialMemoTracker(self.get_transport(), 'meta')
-
def test_get_missing_memo(self):
- tracker = TrivialMemoTracker(self.get_transport(), 'meta')
- self.assertRaises(KeyError, tracker.get_memo, 'not-there')
+ self.assertRaises(KeyError, self.tracker.get_memo, 'not-there')
def test_remove_memo(self):
- t = self.get_transport()
- tracker = TrivialMemoTracker(t, 'meta')
- tracker.add_memo('test-name', 'content')
- tracker.add_memo('test-name2', 'content2')
+ self.tracker.add_memo('test-name', 'content')
+ self.tracker.add_memo('test-name2', 'content2')
self.assertEqual({'test-name': 'content',
- 'test-name2': 'content2'}, tracker._memos)
+ 'test-name2': 'content2'}, self.tracker._memos)
+ self.assertEqual([], sorted(self.tracker._last_read_memos))
+ self.tracker.save()
+ self.tracker.remove_memo('test-name2', 'content2')
self.assertEqual([('test-name', 'content'),
- ('test-name2', 'content2'),
- ], sorted(tracker._added_info))
- self.assertEqual([], sorted(tracker._removed_info))
- tracker.save()
- self.assertEqual([], sorted(tracker._added_info))
- self.assertEqual([], sorted(tracker._removed_info))
- tracker.remove_memo('test-name2', 'content2')
- self.assertEqual([], sorted(tracker._added_info))
- self.assertEqual([('test-name2', 'content2')],
- sorted(tracker._removed_info))
- self.assertEqual({'test-name': 'content'}, tracker._memos)
+ ('test-name2', 'content'),
+ ], sorted(self.tracker._last_read_memos))
+ self.assertEqual({'test-name': 'content'}, self.tracker._memos)
# Before saving, we should have both keys
- btree = tracker._index_class(t, 'meta', size=None)
+ btree = self.tracker._index_class(self.transport, 'meta', size=None)
self.assertEqual(2, btree.key_count())
- tracker.save()
- btree = tracker._index_class(t, 'meta', size=None)
+ self.tracker.save()
+ self.assertEqual([('test-name2', 'content')],
+ sorted(self.tracker._last_read_memos))
+ btree = self.tracker._index_class(self.transport, 'meta', size=None)
self.assertEqual(1, btree.key_count())
def test_add_with_concurrent_add(self):
- t = self.get_transport()
- tracker = TrivialMemoTracker(t, 'meta')
- tracker.add_memo('name', 'content')
- tracker.save()
- tracker2 = TrivialMemoTracker(t, 'meta')
+ self.tracker.add_memo('name', 'content')
+ self.tracker.save()
+ tracker2 = TrivialMemoTracker(self.transport, 'meta')
tracker2.add_memo('name2', 'content2')
- tracker.add_memo('name3', 'content3')
+ self.tracker.add_memo('name3', 'content3')
tracker2.save()
- tracker.save()
+ self.tracker.save()
# When we save, tracker should notice that there was a new entry, and
# add it to the in-memory data.
self.assertEqual({'name': 'content',
'name2': 'content2',
'name3': 'content3',
- }, tracker._memos)
- self.assertEqual([], sorted(tracker._added_info))
- self.assertEqual([], sorted(tracker._removed_info))
+ }, self.tracker._memos)
# And the on-disk record should also have all 3
- btree = tracker._index_class(t, 'meta', size=None)
+ btree = self.tracker._index_class(self.transport, 'meta', size=None)
self.assertEqual(3, btree.key_count())
+
+ def test__diff_from_disk_no_changes(self):
+ self.tracker.add_memo(*self._m1)
+ self.tracker.save()
+ self.assertDiffFromDisk(on_disk=[self._m1],
+ active=[self._m1],
+ d_removed=[],
+ d_added=[],
+ m_removed=[],
+ m_added=[])
+
+ def test__diff_from_empty_disk(self):
+ self.tracker.add_memo(*self._m1)
+ self.assertDiffFromDisk(on_disk=[],
+ active=[self._m1],
+ d_removed=[],
+ d_added=[],
+ m_removed=[],
+ m_added=[self._m1])
+
+ def test__diff_mem_removed(self):
+ self.tracker.add_memo(*self._m1)
+ self.tracker.add_memo(*self._m2)
+ self.tracker.save()
+ self.tracker.remove_memo(*self._m2)
+ self.assertDiffFromDisk(on_disk=[self._m1, self._m2],
+ active=[self._m1],
+ d_removed=[],
+ d_added=[],
+ m_removed=[self._m2],
+ m_added=[])
+
+ def test__diff_mem_added(self):
+ self.tracker.add_memo(*self._m1)
+ self.tracker.save()
+ self.tracker.add_memo(*self._m2)
+ self.assertDiffFromDisk(on_disk=[self._m1],
+ active=[self._m1, self._m2],
+ d_removed=[],
+ d_added=[],
+ m_removed=[],
+ m_added=[self._m2])
+
+ def test__diff_disk_removed(self):
+ self.tracker.add_memo(*self._m1)
+ self.tracker.add_memo(*self._m2)
+ self.tracker.save()
+ tracker2 = TrivialMemoTracker(self.transport, 'meta')
+ tracker2.remove_memo(*self._m2)
+ tracker2.save()
+ self.assertDiffFromDisk(on_disk=[self._m1],
+ active=[self._m1],
+ d_removed=[self._m2],
+ d_added=[],
+ m_removed=[],
+ m_added=[])
+
+ def test__diff_disk_added(self):
+ self.tracker.add_memo(*self._m1)
+ self.tracker.save()
+ tracker2 = TrivialMemoTracker(self.transport, 'meta')
+ tracker2.add_memo(*self._m2)
+ tracker2.save()
+ self.assertDiffFromDisk(on_disk=[self._m1, self._m2],
+ active=[self._m1, self._m2],
+ d_removed=[],
+ d_added=[self._m2],
+ m_removed=[],
+ m_added=[])
More information about the bazaar-commits
mailing list