Rev 5095: _diff_from_disk now does a better permutation of how nodes are added and removed. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

John Arbash Meinel john at arbash-meinel.com
Mon Mar 8 16:36:12 GMT 2010


At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5095
revision-id: john at arbash-meinel.com-20100308163606-n1z028fu2iwjt1zw
parent: john at arbash-meinel.com-20100305225419-073zq23gzeq3e648
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Mon 2010-03-08 10:36:06 -0600
message:
  _diff_from_disk now does a better permutation of how nodes are added and removed.
-------------- next part --------------
=== modified file 'bzrlib/pack_collection.py'
--- a/bzrlib/pack_collection.py	2010-03-05 22:54:19 +0000
+++ b/bzrlib/pack_collection.py	2010-03-08 16:36:06 +0000
@@ -179,10 +179,8 @@
         self.transport = transport
         self.filename = filename
         self._memos = None
-        # Names that have been added to this list, that have not yet been
-        # written to disk.
-        self._added_info = set()
-        self._removed_info = set()
+        # The set of memos() that were on disk when we read it last
+        self._last_read_memos = None
 
     def _do_check_safe_to_cache(self):
         """Make sure that it is valid to cache state in memory.
@@ -226,6 +224,7 @@
                 memos[name] = memo
                 #info_at_load.add((name, memo))
             self._memos = memos
+            self._last_read_memos = set(self._memos.iteritems())
             result = True
         return result
 
@@ -255,22 +254,44 @@
             return []
         return index.iter_all_entries()
 
-    def _diff_memos_from_disk(self):
+    def _diff_from_disk(self):
         """Compare the current memos in memory versus the ones on disk.
 
         Prepare for updating the file on disk.
+
+        :return: (on_disk, all_active, removed, added) All of these are sets()
+            of (name, memo) pairs. This means that *modifying* a value shows up
+            as an remove + add.
+
+            on_disk     memos that are in the written file, this may have ones
+                        we are removing, or ones added by someone else
+            all_active  Our in-memory set. This is on_disk - ones we are
+                        removing + ones we are adding
+            disk_removed Ones that were in _last_read_memos but are no longer
+                        on disk
+            disk_added  Ones that are in 'on_disk' but were not in
+                        _last_read_memos
+            mem_removed Ones that were in _last_read_memos but we have stopped
+                        tracking
+            mem_added   Ones not in _last_read_memos that we have locally added
         """
-        disk_nodes = set()
+        disk_memos = set()
         for _, key, value in self._iter_disk_index():
             name = key[0]
-            disk_nodes.add((name, value))
+            disk_memos.add((name, value))
+        disk_added = disk_memos - self._last_read_memos
+        disk_removed = self._last_read_memos - disk_memos
+        mem_memos = set(self._memos.items())
+        mem_added = mem_memos - self._last_read_memos
+        mem_removed = self._last_read_memos - mem_memos
+        return (disk_memos, (disk_memos - mem_removed) | mem_added,
+                disk_removed, disk_added, mem_removed, mem_added)
 
     def add_memo(self, name, value):
         """Add a memo to this cache."""
         self._ensure_loaded()
         assert name not in self._memos
         self._memos[name] = value
-        self._added_info.add((name, value))
 
     def get_memo(self, name):
         """Get the section (index) memo for the given name."""
@@ -282,9 +303,9 @@
 
         We require 'value' so that we can ensure it matches.
         """
+        self._ensure_loaded()
         old = self._memos.pop(name)
         assert old == value
-        self._removed_info.add((name, value))
 
     def save(self):
         """Save the current in-memory content to disk."""
@@ -295,8 +316,7 @@
             builder.add_node((name,), value)
         self.transport.put_file(self.filename, builder.finish(),
                                 mode=self._do_get_file_mode())
-        self._added_info = set()
-        self._removed_info = set()
+        self._last_read_memos = set(self._memos.iteritems())
 
 
 

=== modified file 'bzrlib/tests/test_pack_collection.py'
--- a/bzrlib/tests/test_pack_collection.py	2010-03-05 22:54:19 +0000
+++ b/bzrlib/tests/test_pack_collection.py	2010-03-08 16:36:06 +0000
@@ -140,88 +140,158 @@
 
 class TestMemoTracker(tests.TestCaseWithMemoryTransport):
 
+    _m1 = ('name1', 'content1')
+    _m2 = ('name2', 'content2')
+    _m3 = ('name3', 'content3')
+    _m4 = ('name4', 'content4')
+    _m5 = ('name5', 'content5')
+    _m5 = ('name6', 'content6')
+
+    def setUp(self):
+        super(TestMemoTracker, self).setUp()
+        self.transport = self.get_transport()
+        self.tracker = TrivialMemoTracker(self.transport, 'meta')
+
+    def assertDiffFromDisk(self, on_disk, active, d_removed, d_added,
+                           m_removed, m_added):
+        actual = self.tracker._diff_from_disk()
+        self.assertEqual([sorted(on_disk), sorted(active),
+                          sorted(d_removed), sorted(d_added),
+                          sorted(m_removed), sorted(m_added)],
+                         map(sorted, actual))
+        
     def test__ensure_no_file(self):
-        t = self.get_transport()
-        tracker = TrivialMemoTracker(t, 'meta')
         # Shouldn't raise an exception, but just set up that there isn't any
         # content
-        self.assertTrue(tracker._ensure_loaded())
-        self.assertEqual({}, tracker._memos)
+        self.assertTrue(self.tracker._ensure_loaded())
+        self.assertEqual({}, self.tracker._memos)
         # We should create the file at this time, and it should be a valid
         # index file
-        tracker.save()
-        btree = tracker._index_class(t, 'meta', size=None)
+        self.tracker.save()
+        btree = self.tracker._index_class(self.transport, 'meta', size=None)
         self.assertEqual(0, btree.key_count())
 
     def test_add_memo(self):
-        t = self.get_transport()
-        tracker = TrivialMemoTracker(t, 'meta')
-        tracker.add_memo('test-name', 'content')
-        self.assertEqual('content', tracker.get_memo('test-name'))
-        self.assertEqual({'test-name': 'content'}, tracker._memos)
+        self.tracker.add_memo('test-name', 'content')
+        self.assertEqual('content', self.tracker.get_memo('test-name'))
+        self.assertEqual({'test-name': 'content'}, self.tracker._memos)
+        self.assertEqual([], sorted(self.tracker._last_read_memos))
+        self.tracker.save()
+        self.assertEqual({'test-name': 'content'}, self.tracker._memos)
         self.assertEqual([('test-name', 'content')],
-                         sorted(tracker._added_info))
-        self.assertEqual([], sorted(tracker._removed_info))
-        tracker.save()
-        self.assertEqual({'test-name': 'content'}, tracker._memos)
-        self.assertEqual([], sorted(tracker._added_info))
-        self.assertEqual([], sorted(tracker._removed_info))
-        btree = tracker._index_class(t, 'meta', size=None)
+                         sorted(self.tracker._last_read_memos))
+        btree = self.tracker._index_class(self.transport, 'meta', size=None)
         self.assertEqual(1, btree.key_count())
         self.assertEqual([(('test-name',), 'content')],
                  sorted([(n[1], n[2]) for n in btree.iter_all_entries()]))
 
-    def test__diff_memos_from_disk(self):
-        tracker = TrivialMemoTracker(self.get_transport(), 'meta')
-
     def test_get_missing_memo(self):
-        tracker = TrivialMemoTracker(self.get_transport(), 'meta')
-        self.assertRaises(KeyError, tracker.get_memo, 'not-there')
+        self.assertRaises(KeyError, self.tracker.get_memo, 'not-there')
 
     def test_remove_memo(self):
-        t = self.get_transport()
-        tracker = TrivialMemoTracker(t, 'meta')
-        tracker.add_memo('test-name', 'content')
-        tracker.add_memo('test-name2', 'content2')
+        self.tracker.add_memo('test-name', 'content')
+        self.tracker.add_memo('test-name2', 'content2')
         self.assertEqual({'test-name': 'content',
-                          'test-name2': 'content2'}, tracker._memos)
+                          'test-name2': 'content2'}, self.tracker._memos)
+        self.assertEqual([], sorted(self.tracker._last_read_memos))
+        self.tracker.save()
+        self.tracker.remove_memo('test-name2', 'content2')
         self.assertEqual([('test-name', 'content'),
-                          ('test-name2', 'content2'),
-                         ], sorted(tracker._added_info))
-        self.assertEqual([], sorted(tracker._removed_info))
-        tracker.save()
-        self.assertEqual([], sorted(tracker._added_info))
-        self.assertEqual([], sorted(tracker._removed_info))
-        tracker.remove_memo('test-name2', 'content2')
-        self.assertEqual([], sorted(tracker._added_info))
-        self.assertEqual([('test-name2', 'content2')],
-                         sorted(tracker._removed_info))
-        self.assertEqual({'test-name': 'content'}, tracker._memos)
+                          ('test-name2', 'content'),
+                         ], sorted(self.tracker._last_read_memos))
+        self.assertEqual({'test-name': 'content'}, self.tracker._memos)
         # Before saving, we should have both keys
-        btree = tracker._index_class(t, 'meta', size=None)
+        btree = self.tracker._index_class(self.transport, 'meta', size=None)
         self.assertEqual(2, btree.key_count())
-        tracker.save()
-        btree = tracker._index_class(t, 'meta', size=None)
+        self.tracker.save()
+        self.assertEqual([('test-name2', 'content')],
+                         sorted(self.tracker._last_read_memos))
+        btree = self.tracker._index_class(self.transport, 'meta', size=None)
         self.assertEqual(1, btree.key_count())
 
     def test_add_with_concurrent_add(self):
-        t = self.get_transport()
-        tracker = TrivialMemoTracker(t, 'meta')
-        tracker.add_memo('name', 'content')
-        tracker.save()
-        tracker2 = TrivialMemoTracker(t, 'meta')
+        self.tracker.add_memo('name', 'content')
+        self.tracker.save()
+        tracker2 = TrivialMemoTracker(self.transport, 'meta')
         tracker2.add_memo('name2', 'content2')
-        tracker.add_memo('name3', 'content3')
+        self.tracker.add_memo('name3', 'content3')
         tracker2.save()
-        tracker.save()
+        self.tracker.save()
         # When we save, tracker should notice that there was a new entry, and
         # add it to the in-memory data.
         self.assertEqual({'name': 'content',
                           'name2': 'content2',
                           'name3': 'content3',
-                         }, tracker._memos)
-        self.assertEqual([], sorted(tracker._added_info))
-        self.assertEqual([], sorted(tracker._removed_info))
+                         }, self.tracker._memos)
         # And the on-disk record should also have all 3
-        btree = tracker._index_class(t, 'meta', size=None)
+        btree = self.tracker._index_class(self.transport, 'meta', size=None)
         self.assertEqual(3, btree.key_count())
+
+    def test__diff_from_disk_no_changes(self):
+        self.tracker.add_memo(*self._m1)
+        self.tracker.save()
+        self.assertDiffFromDisk(on_disk=[self._m1],
+                                active=[self._m1],
+                                d_removed=[],
+                                d_added=[],
+                                m_removed=[],
+                                m_added=[])
+
+    def test__diff_from_empty_disk(self):
+        self.tracker.add_memo(*self._m1)
+        self.assertDiffFromDisk(on_disk=[],
+                                active=[self._m1],
+                                d_removed=[],
+                                d_added=[],
+                                m_removed=[],
+                                m_added=[self._m1])
+
+    def test__diff_mem_removed(self):
+        self.tracker.add_memo(*self._m1)
+        self.tracker.add_memo(*self._m2)
+        self.tracker.save()
+        self.tracker.remove_memo(*self._m2)
+        self.assertDiffFromDisk(on_disk=[self._m1, self._m2],
+                                active=[self._m1],
+                                d_removed=[],
+                                d_added=[],
+                                m_removed=[self._m2],
+                                m_added=[])
+
+    def test__diff_mem_added(self):
+        self.tracker.add_memo(*self._m1)
+        self.tracker.save()
+        self.tracker.add_memo(*self._m2)
+        self.assertDiffFromDisk(on_disk=[self._m1],
+                                active=[self._m1, self._m2],
+                                d_removed=[],
+                                d_added=[],
+                                m_removed=[],
+                                m_added=[self._m2])
+
+    def test__diff_disk_removed(self):
+        self.tracker.add_memo(*self._m1)
+        self.tracker.add_memo(*self._m2)
+        self.tracker.save()
+        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2.remove_memo(*self._m2)
+        tracker2.save()
+        self.assertDiffFromDisk(on_disk=[self._m1],
+                                active=[self._m1],
+                                d_removed=[self._m2],
+                                d_added=[],
+                                m_removed=[],
+                                m_added=[])
+
+    def test__diff_disk_added(self):
+        self.tracker.add_memo(*self._m1)
+        self.tracker.save()
+        tracker2 = TrivialMemoTracker(self.transport, 'meta')
+        tracker2.add_memo(*self._m2)
+        tracker2.save()
+        self.assertDiffFromDisk(on_disk=[self._m1, self._m2],
+                                active=[self._m1, self._m2],
+                                d_removed=[],
+                                d_added=[self._m2],
+                                m_removed=[],
+                                m_added=[])



More information about the bazaar-commits mailing list