Rev 4417: Change CHKMap.from_dict to create a LeafNode and split it. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct

John Arbash Meinel john at arbash-meinel.com
Mon Jun 8 18:29:52 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct

------------------------------------------------------------
revno: 4417
revision-id: john at arbash-meinel.com-20090608172947-r9hucea21l7mse3m
parent: john at arbash-meinel.com-20090606014721-xqbksp0fl6ossesk
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-chk-direct
timestamp: Mon 2009-06-08 12:29:47 -0500
message:
  Change CHKMap.from_dict to create a LeafNode and split it.
  As opposed to using multiple calls to .map().
  There were a few edge cases that needed to be fixed.
  Such as:
  1) setting self._raw_size
  2) LeafNode.map() needed to properly handle when one of its child nodes also split.
  3) Update CHK1.apply_inventory_by_delta to use this new function when appropriate
  4) For now from_dict() continues to do it both ways, just to make sure it gives
  correct results.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-06-06 01:47:21 +0000
+++ b/bzrlib/chk_map.py	2009-06-08 17:29:47 +0000
@@ -98,39 +98,6 @@
         else:
             self._root_node = self._node_key(root_key)
 
-    def apply_insert_delta(self, delta):
-        """Apply a delta that only inserts items.
-
-        :param delta: An iterable of old_key, new_key, new_value tuples.
-            all old_key entries must be None, and all new_key entries must not
-            be None.
-        """
-        self._ensure_root()
-        node = LeafNode(search_key_func=self._search_key_func)
-        node.set_maximum_size(self._root_node._maximum_size)
-        node._key_width = self._root_node._key_width
-        for old, new, value in delta:
-            assert old is None
-            assert new is not None
-            node._items[new] = value
-        node._len = len(node._items)
-        node._compute_search_prefix()
-        node._compute_serialised_prefix()
-        if (node._len > 1
-            and node._current_size() > node._maximum_size):
-            prefix, node_details = node._split()
-            if len(node_details) == 1:
-                self._root_node = node_details[0][1]
-            else:
-                first_node = node_details[0][1]
-                self._root_node = InternalNode(prefix,
-                                    search_key_func=self._search_key_func)
-                self._root_node.set_maximum_size(first_node.maximum_size)
-                self._root_node._key_width = first_node._key_width
-                for split, node in node_details:
-                    self._root_node.add_node(split, node)
-        return self._save()
-
     def apply_delta(self, delta):
         """Apply a delta to the map.
 
@@ -242,7 +209,32 @@
         delta = []
         for key, value in initial_value.items():
             delta.append((None, key, value))
-        return result.apply_delta(delta)
+        root_key = result.apply_delta(delta)
+        node = LeafNode(search_key_func=search_key_func)
+        node.set_maximum_size(maximum_size)
+        node._key_width = key_width
+        node._items = dict(initial_value)
+        node._raw_size = sum([node._key_value_len(key, value)
+                              for key,value in initial_value.iteritems()])
+        node._len = len(node._items)
+        node._compute_search_prefix()
+        node._compute_serialised_prefix()
+        if (node._len > 1
+            and maximum_size
+            and node._current_size() > maximum_size):
+            prefix, node_details = node._split(store)
+            assert len(node_details) != 1
+            node = InternalNode(prefix, search_key_func=search_key_func)
+            node.set_maximum_size(maximum_size)
+            node._key_width = key_width
+            for split, subnode in node_details:
+                node.add_node(split, subnode)
+        keys = list(node.serialise(store))
+        if root_key != keys[-1]:
+            result2 = CHKMap(store, keys[-1], search_key_func=search_key_func)
+            import pdb; pdb.set_trace()
+            raise ValueError('Failed to serialize via leaf splitting.')
+        return root_key
 
     def iter_changes(self, basis):
         """Iterate over the changes between basis and self.
@@ -797,7 +789,19 @@
                 result[prefix] = node
             else:
                 node = result[prefix]
-            node.map(store, key, value)
+            sub_prefix, node_details = node.map(store, key, value)
+            if len(node_details) > 1:
+                if prefix != sub_prefix:
+                    # This node has been split and is now found via a different
+                    # path
+                    result.pop(prefix)
+                new_node = InternalNode(sub_prefix,
+                    search_key_func=self._search_key_func)
+                new_node.set_maximum_size(self._maximum_size)
+                new_node._key_width = self._key_width
+                for split, node in node_details:
+                    new_node.add_node(split, node)
+                result[prefix] = new_node
         return common_prefix, result.items()
 
     def map(self, store, key, value):
@@ -846,6 +850,7 @@
         self._key = ("sha1:" + sha1,)
         bytes = ''.join(lines)
         if len(bytes) != self._current_size():
+            import pdb; pdb.set_trace()
             raise AssertionError('Invalid _current_size')
         _page_cache.add(self._key, bytes)
         return [self._key]
@@ -1263,7 +1268,6 @@
         :return: An iterable of (prefix, node) tuples. prefix is a byte
             prefix for reaching node.
         """
-        import pdb; pdb.set_trace()
         if offset >= self._node_width:
             for node in self._items.values():
                 for result in node._split(offset):

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-06-05 21:10:29 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-06-08 17:29:47 +0000
@@ -691,17 +691,20 @@
         null_inv.root_id = None
         return null_inv
 
-    def _apply_delta_to_null_inv(self, new_inv, delta, new_revision_id):
+    def _create_inv_from_null(self, new_inv, delta, new_revision_id):
         """This will mutate new_inv directly.
 
         This is a simplified form of create_by_apply_delta which knows that all
         the old values must be None, so everything is a create.
         """
+        serializer = self._format._serializer
+        new_inv = inventory.CHKInventory(serializer.search_key_name)
         new_inv.revision_id = new_revision_id
+
         entry_to_bytes = new_inv._entry_to_bytes
         assert new_inv.parent_id_basename_to_file_id is not None
-        id_to_entry_delta = []
-        parent_id_basename_delta = []
+        id_to_entry_dict = {}
+        parent_id_basename_dict = {}
         for old_path, new_path, file_id, entry in delta:
             assert old_path is None
             assert new_path is not None
@@ -713,14 +716,26 @@
                 utf8_entry_name = entry.name.encode('utf-8')
                 parent_id_basename_key = (entry.parent_id, utf8_entry_name)
             new_value = entry_to_bytes(entry)
-            # Update caches. It's worth doing this whether
-            # we're propagating the old caches or not.
-            ## result._path_to_fileid_cache[new_path] = file_id
-            id_to_entry_delta.append((None, (file_id,), new_value))
-            parent_id_basename_delta.append(
-                (None, parent_id_basename_key, file_id))
-        new_inv.id_to_entry.apply_insert_delta(id_to_entry_delta)
-        new_inv.parent_id_basename_to_file_id.apply_insert_delta(parent_id_basename_delta)
+            # Create Caches?
+            ## new_inv._path_to_fileid_cache[new_path] = file_id
+            id_to_entry_dict[(file_id,)] = new_value
+            parent_id_basename_dict[parent_id_basename_key] = file_id
+
+        search_key_func = chk_map.search_key_registry.get(
+                            serializer.search_key_name)
+        maximum_size = serializer.maximum_size
+        root_key = CHKMap.from_dict(self.chk_bytes, id_to_entry_dict,
+                                    maximum_size=maximum_size, key_width=1,
+                                    search_key_func=search_key_func)
+        new_inv.id_to_entry = chk_map.CHKMap(self.chk_bytes, root_key,
+                                             search_key_func)
+        root_key = CHKMap.from_dict(self.chk_bytes, parent_id_basename_key,
+                                    maximum_size=maximum_size, key_width=2,
+                                    search_key_func=search_key_func)
+        new_inv.parent_id_basename_key = chk_map.CHKMap(self.chk_bytes,
+                                                        root_key,
+                                                        search_key_func)
+        return new_inv
 
     def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
                                parents, basis_inv=None, propagate_caches=False):
@@ -753,8 +768,7 @@
         basis_tree = None
         if basis_inv is None:
             if basis_revision_id == _mod_revision.NULL_REVISION:
-                new_inv = self._get_null_inventory()
-                self._apply_delta_to_null_inv(new_inv, delta, new_revision_id)
+                new_inv = self._create_inv_from_null(delta, new_revision_id)
                 inv_lines = new_inv.to_lines()
                 return self._inventory_add_lines(new_revision_id, parents,
                     inv_lines, check_content=False), new_inv



More information about the bazaar-commits mailing list