Rev 3802: Add logic to map() so that it can also collapse when necessary. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/chk_map

John Arbash Meinel john at arbash-meinel.com
Tue Dec 2 23:44:46 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/chk_map

------------------------------------------------------------
revno: 3802
revision-id: john at arbash-meinel.com-20081202234434-b2ahyaq5vqg6jylz
parent: john at arbash-meinel.com-20081202220545-sflppo9zeho3z3j3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: chk_map
timestamp: Tue 2008-12-02 17:44:34 -0600
message:
  Add logic to map() so that it can also collapse when necessary.
  
  It would be nice if check_remap could use size information to determine this
  without actually having to build up a LeafNode, but that will depend on the
  serializer in the end. Profiling will show if it ever really matters.
  
  One other possibility would be to limit the number of new nodes we pull in at
  a time. If we are going to have to load 50 LeafNodes then we are rather positive
  that it won't fit into a new LeafNode.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-12-02 22:05:45 +0000
+++ b/bzrlib/chk_map.py	2008-12-02 23:44:34 +0000
@@ -735,6 +735,10 @@
             # new child needed:
             child = self._new_child(serialised_key, LeafNode)
         old_len = len(child)
+        if isinstance(child, LeafNode):
+            old_size = child._current_size()
+        else:
+            old_size = None
         prefix, node_details = child.map(store, key, value)
         if len(node_details) == 1:
             # child may have shrunk, or might be a new node
@@ -742,7 +746,15 @@
             self._len = self._len - old_len + len(child)
             self._items[serialised_key] = child
             self._key = None
-            return self.unique_serialised_prefix(), [("", self)]
+            new_node = self
+            if (isinstance(child, LeafNode)
+                and (old_size is None or child._current_size() < old_size)):
+                # The old node was an InternalNode which means it has now
+                # collapsed, so we need to check if it will chain to a collapse
+                # at this level. Or the LeafNode has shrunk in size, so we need
+                # to check that as well.
+                new_node = self._check_remap(store)
+            return new_node.unique_serialised_prefix(), [("", new_node)]
         # child has overflown - create a new intermediate node.
         # XXX: This is where we might want to try and expand our depth
         # to refer to more bytes of every child (which would give us
@@ -881,7 +893,11 @@
         return self._check_remap(store)
 
     def _check_remap(self, store):
-        """Check if all keys contained by children fit in a single LeafNode."""
+        """Check if all keys contained by children fit in a single LeafNode.
+
+        :param store: A store to use for reading more nodes
+        :return: Either self, or a new LeafNode which should replace self.
+        """
         # Logic for how we determine when we need to rebuild
         # 1) Implicitly unmap() is removing a key which means that the child
         #    nodes are going to be shrinking by some extent.

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2008-12-02 22:05:45 +0000
+++ b/bzrlib/tests/test_chk_map.py	2008-12-02 23:44:34 +0000
@@ -337,6 +337,53 @@
                              "      ('aaabadaa',) 'v'\n",
                              chkmap._dump_tree())
 
+    def test_map_collapses_if_size_changes(self):
+        store = self.get_chk_bytes()
+        chkmap = CHKMap(store, None)
+        # Should fit 2 keys per LeafNode
+        chkmap._root_node.set_maximum_size(30)
+        chkmap.map(('aaa',), 'v')
+        chkmap.map(('aab',), 'very long value that splits')
+        self.assertEqualDiff("'' InternalNode None\n"
+                             "  'aaa' LeafNode None\n"
+                             "      ('aaa',) 'v'\n"
+                             "  'aab' LeafNode None\n"
+                             "      ('aab',) 'very long value that splits'\n",
+                             chkmap._dump_tree())
+        self.assertCanonicalForm(chkmap)
+        # Now changing the value to something small should cause a rebuild
+        chkmap.map(('aab',), 'v')
+        self.assertEqualDiff("'' LeafNode None\n"
+                             "      ('aaa',) 'v'\n"
+                             "      ('aab',) 'v'\n",
+                             chkmap._dump_tree())
+        self.assertCanonicalForm(chkmap)
+
+    def test_map_double_deep_collapses(self):
+        store = self.get_chk_bytes()
+        chkmap = CHKMap(store, None)
+        # Should fit 3 small keys per LeafNode
+        chkmap._root_node.set_maximum_size(40)
+        chkmap.map(('aaa',), 'v')
+        chkmap.map(('aab',), 'very long value that splits')
+        chkmap.map(('abc',), 'v')
+        self.assertEqualDiff("'' InternalNode None\n"
+                             "  'aa' InternalNode None\n"
+                             "    'aaa' LeafNode None\n"
+                             "      ('aaa',) 'v'\n"
+                             "    'aab' LeafNode None\n"
+                             "      ('aab',) 'very long value that splits'\n"
+                             "  'ab' LeafNode None\n"
+                             "      ('abc',) 'v'\n",
+                             chkmap._dump_tree())
+        chkmap.map(('aab',), 'v')
+        self.assertCanonicalForm(chkmap)
+        self.assertEqualDiff("'' LeafNode None\n"
+                             "      ('aaa',) 'v'\n"
+                             "      ('aab',) 'v'\n"
+                             "      ('abc',) 'v'\n",
+                             chkmap._dump_tree())
+
     def test_stable_unmap(self):
         store = self.get_chk_bytes()
         chkmap = CHKMap(store, None)



More information about the bazaar-commits mailing list