Rev 4417: Change CHKMap.from_dict to create a LeafNode and split it. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 8 18:29:52 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct
------------------------------------------------------------
revno: 4417
revision-id: john at arbash-meinel.com-20090608172947-r9hucea21l7mse3m
parent: john at arbash-meinel.com-20090606014721-xqbksp0fl6ossesk
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-chk-direct
timestamp: Mon 2009-06-08 12:29:47 -0500
message:
Change CHKMap.from_dict to create a LeafNode and split it.
As opposed to using multiple calls to .map().
There were a few edge cases that needed to be fixed.
Such as:
1) setting self._raw_size
2) LeafNode.map() needed to properly handle when one of its child nodes also split.
3) Update CHK1.apply_inventory_by_delta to use this new function when appropriate
4) For now from_dict() continues to do it both ways, just to make sure it gives
correct results.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-06-06 01:47:21 +0000
+++ b/bzrlib/chk_map.py 2009-06-08 17:29:47 +0000
@@ -98,39 +98,6 @@
else:
self._root_node = self._node_key(root_key)
- def apply_insert_delta(self, delta):
- """Apply a delta that only inserts items.
-
- :param delta: An iterable of old_key, new_key, new_value tuples.
- all old_key entries must be None, and all new_key entries must not
- be None.
- """
- self._ensure_root()
- node = LeafNode(search_key_func=self._search_key_func)
- node.set_maximum_size(self._root_node._maximum_size)
- node._key_width = self._root_node._key_width
- for old, new, value in delta:
- assert old is None
- assert new is not None
- node._items[new] = value
- node._len = len(node._items)
- node._compute_search_prefix()
- node._compute_serialised_prefix()
- if (node._len > 1
- and node._current_size() > node._maximum_size):
- prefix, node_details = node._split()
- if len(node_details) == 1:
- self._root_node = node_details[0][1]
- else:
- first_node = node_details[0][1]
- self._root_node = InternalNode(prefix,
- search_key_func=self._search_key_func)
- self._root_node.set_maximum_size(first_node.maximum_size)
- self._root_node._key_width = first_node._key_width
- for split, node in node_details:
- self._root_node.add_node(split, node)
- return self._save()
-
def apply_delta(self, delta):
"""Apply a delta to the map.
@@ -242,7 +209,32 @@
delta = []
for key, value in initial_value.items():
delta.append((None, key, value))
- return result.apply_delta(delta)
+ root_key = result.apply_delta(delta)
+ node = LeafNode(search_key_func=search_key_func)
+ node.set_maximum_size(maximum_size)
+ node._key_width = key_width
+ node._items = dict(initial_value)
+ node._raw_size = sum([node._key_value_len(key, value)
+ for key,value in initial_value.iteritems()])
+ node._len = len(node._items)
+ node._compute_search_prefix()
+ node._compute_serialised_prefix()
+ if (node._len > 1
+ and maximum_size
+ and node._current_size() > maximum_size):
+ prefix, node_details = node._split(store)
+ assert len(node_details) != 1
+ node = InternalNode(prefix, search_key_func=search_key_func)
+ node.set_maximum_size(maximum_size)
+ node._key_width = key_width
+ for split, subnode in node_details:
+ node.add_node(split, subnode)
+ keys = list(node.serialise(store))
+ if root_key != keys[-1]:
+ result2 = CHKMap(store, keys[-1], search_key_func=search_key_func)
+ import pdb; pdb.set_trace()
+ raise ValueError('Failed to serialize via leaf splitting.')
+ return root_key
def iter_changes(self, basis):
"""Iterate over the changes between basis and self.
@@ -797,7 +789,19 @@
result[prefix] = node
else:
node = result[prefix]
- node.map(store, key, value)
+ sub_prefix, node_details = node.map(store, key, value)
+ if len(node_details) > 1:
+ if prefix != sub_prefix:
+ # This node has been split and is now found via a different
+ # path
+ result.pop(prefix)
+ new_node = InternalNode(sub_prefix,
+ search_key_func=self._search_key_func)
+ new_node.set_maximum_size(self._maximum_size)
+ new_node._key_width = self._key_width
+ for split, node in node_details:
+ new_node.add_node(split, node)
+ result[prefix] = new_node
return common_prefix, result.items()
def map(self, store, key, value):
@@ -846,6 +850,7 @@
self._key = ("sha1:" + sha1,)
bytes = ''.join(lines)
if len(bytes) != self._current_size():
+ import pdb; pdb.set_trace()
raise AssertionError('Invalid _current_size')
_page_cache.add(self._key, bytes)
return [self._key]
@@ -1263,7 +1268,6 @@
:return: An iterable of (prefix, node) tuples. prefix is a byte
prefix for reaching node.
"""
- import pdb; pdb.set_trace()
if offset >= self._node_width:
for node in self._items.values():
for result in node._split(offset):
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-06-05 21:10:29 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-06-08 17:29:47 +0000
@@ -691,17 +691,20 @@
null_inv.root_id = None
return null_inv
- def _apply_delta_to_null_inv(self, new_inv, delta, new_revision_id):
+ def _create_inv_from_null(self, new_inv, delta, new_revision_id):
"""This will mutate new_inv directly.
This is a simplified form of create_by_apply_delta which knows that all
the old values must be None, so everything is a create.
"""
+ serializer = self._format._serializer
+ new_inv = inventory.CHKInventory(serializer.search_key_name)
new_inv.revision_id = new_revision_id
+
entry_to_bytes = new_inv._entry_to_bytes
assert new_inv.parent_id_basename_to_file_id is not None
- id_to_entry_delta = []
- parent_id_basename_delta = []
+ id_to_entry_dict = {}
+ parent_id_basename_dict = {}
for old_path, new_path, file_id, entry in delta:
assert old_path is None
assert new_path is not None
@@ -713,14 +716,26 @@
utf8_entry_name = entry.name.encode('utf-8')
parent_id_basename_key = (entry.parent_id, utf8_entry_name)
new_value = entry_to_bytes(entry)
- # Update caches. It's worth doing this whether
- # we're propagating the old caches or not.
- ## result._path_to_fileid_cache[new_path] = file_id
- id_to_entry_delta.append((None, (file_id,), new_value))
- parent_id_basename_delta.append(
- (None, parent_id_basename_key, file_id))
- new_inv.id_to_entry.apply_insert_delta(id_to_entry_delta)
- new_inv.parent_id_basename_to_file_id.apply_insert_delta(parent_id_basename_delta)
+ # Create Caches?
+ ## new_inv._path_to_fileid_cache[new_path] = file_id
+ id_to_entry_dict[(file_id,)] = new_value
+ parent_id_basename_dict[parent_id_basename_key] = file_id
+
+ search_key_func = chk_map.search_key_registry.get(
+ serializer.search_key_name)
+ maximum_size = serializer.maximum_size
+ root_key = CHKMap.from_dict(self.chk_bytes, id_to_entry_dict,
+ maximum_size=maximum_size, key_width=1,
+ search_key_func=search_key_func)
+ new_inv.id_to_entry = chk_map.CHKMap(self.chk_bytes, root_key,
+ search_key_func)
+ root_key = CHKMap.from_dict(self.chk_bytes, parent_id_basename_key,
+ maximum_size=maximum_size, key_width=2,
+ search_key_func=search_key_func)
+ new_inv.parent_id_basename_key = chk_map.CHKMap(self.chk_bytes,
+ root_key,
+ search_key_func)
+ return new_inv
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
parents, basis_inv=None, propagate_caches=False):
@@ -753,8 +768,7 @@
basis_tree = None
if basis_inv is None:
if basis_revision_id == _mod_revision.NULL_REVISION:
- new_inv = self._get_null_inventory()
- self._apply_delta_to_null_inv(new_inv, delta, new_revision_id)
+ new_inv = self._create_inv_from_null(delta, new_revision_id)
inv_lines = new_inv.to_lines()
return self._inventory_add_lines(new_revision_id, parents,
inv_lines, check_content=False), new_inv
More information about the bazaar-commits
mailing list