Rev 3810: (broken) Start tracking down more code that needs to pass around the 'search_key_func' in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
John Arbash Meinel
john at arbash-meinel.com
Mon Jan 12 22:55:13 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
------------------------------------------------------------
revno: 3810
revision-id: john at arbash-meinel.com-20090112225502-lb8om88nqe1u5o3g
parent: john at arbash-meinel.com-20090112184455-cich99qy75uqt2v1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Mon 2009-01-12 16:55:02 -0600
message:
(broken) Start tracking down more code that needs to pass around the 'search_key_func'
and make sure that things get done correctly.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-01-07 22:15:57 +0000
+++ b/bzrlib/chk_map.py 2009-01-12 22:55:02 +0000
@@ -53,19 +53,27 @@
_page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)
+
+
class CHKMap(object):
"""A persistent map from string to string backed by a CHK store."""
- def __init__(self, store, root_key):
+ def __init__(self, store, root_key, search_key_func=None):
"""Create a CHKMap object.
:param store: The store the CHKMap is stored in.
:param root_key: The root key of the map. None to create an empty
CHKMap.
+ :param search_key_func: A function mapping a key => bytes. These bytes
+ are then used by the internal nodes to split up leaf nodes into
+ multiple pages.
"""
self._store = store
+ if search_key_func is None:
+ search_key_func = '\x00'.join
+ self._search_key_func = search_key_func
if root_key is None:
- self._root_node = LeafNode()
+ self._root_node = LeafNode(search_key_func=search_key_func)
else:
self._root_node = self._node_key(root_key)
@@ -92,6 +100,8 @@
if type(self._root_node) == tuple:
# Demand-load the root
self._root_node = self._get_node(self._root_node)
+ # XXX: Shouldn't this be put into _deserialize?
+ self._root_node._search_key_func = self._search_key_func
def _get_node(self, node):
"""Get a node.
@@ -359,7 +369,8 @@
if len(node_details) == 1:
self._root_node = node_details[0][1]
else:
- self._root_node = InternalNode(prefix)
+ self._root_node = InternalNode(prefix,
+ search_key_func=self._search_key_func)
self._root_node.set_maximum_size(node_details[0][1].maximum_size)
self._root_node._key_width = node_details[0][1]._key_width
for split, node in node_details:
@@ -490,11 +501,15 @@
the key/value pairs.
"""
- def __init__(self):
+ def __init__(self, search_key_func=None):
Node.__init__(self)
# All of the keys in this leaf node share this common prefix
self._common_serialised_prefix = None
self._serialise_key = '\x00'.join
+ if search_key_func is None:
+ self._search_key_func = self._serialise_key
+ else:
+ self._search_key_func = search_key_func
def _current_size(self):
"""Answer the current serialised size of this node.
@@ -587,6 +602,9 @@
# then that can be done via the C extension
return 2 + len(self._serialise_key(key)) + len(value)
+ def _search_key(self, key):
+ return self._search_key_func(key)
+
def _map_no_split(self, key, value):
"""Map a key to a value.
@@ -641,7 +659,7 @@
if len(prefix) < split_at:
prefix += '\x00'*(split_at - len(prefix))
if prefix not in result:
- node = LeafNode()
+ node = LeafNode(search_key_func=self._search_key_func)
node.set_maximum_size(self._maximum_size)
node._key_width = self._key_width
result[prefix] = node
@@ -689,10 +707,6 @@
_page_cache.add(self._key, bytes)
return [self._key]
- def _search_key(self, key):
- """Return the search key for a key in this node."""
- return '\x00'.join(key)
-
def refs(self):
"""Return the references to other CHK's held by this node."""
return []
@@ -739,12 +753,16 @@
LeafNode or InternalNode.
"""
- def __init__(self, prefix=''):
+ def __init__(self, prefix='', search_key_func=None):
Node.__init__(self)
# The size of an internalnode with default values and no children.
# How many octets key prefixes within this node are.
self._node_width = 0
self._search_prefix = prefix
+ if search_key_func is None:
+ self._search_key_func = '\x00'.join
+ else:
+ self._search_key_func = search_key_func
def __repr__(self):
items_str = sorted(self._items)
@@ -902,7 +920,8 @@
# and then map this key into that node.
new_prefix = self.common_prefix(self._search_prefix,
search_key)
- new_parent = InternalNode(new_prefix)
+ # new_parent = InternalNode(new_prefix,
+ # search_key_func=self._search_key_func)
new_parent.set_maximum_size(self._maximum_size)
new_parent._key_width = self._key_width
new_parent.add_node(self._search_prefix[:len(new_prefix)+1],
@@ -995,7 +1014,7 @@
"""Return the serialised key for key in this node."""
# search keys are fixed width. All will be self._node_width wide, so we
# pad as necessary.
- return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
+ return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]
def _search_prefix_filter(self, key):
"""Serialise key for use as a prefix filter in iteritems."""
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2009-01-07 22:15:57 +0000
+++ b/bzrlib/tests/test_chk_map.py 2009-01-12 22:55:02 +0000
@@ -1000,6 +1000,70 @@
chkmap._dump_tree(include_keys=True))
+def _test_search_key(key):
+ return 'test:' + '\x00'.join(key)
+
+
+class TestMapSearchKeys(TestCaseWithStore):
+
+ def test_default_chk_map_uses_flat_search_key(self):
+ chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
+ self.assertEqual('1',
+ chkmap._search_key_func(('1',)))
+ self.assertEqual('1\x002',
+ chkmap._search_key_func(('1', '2')))
+ self.assertEqual('1\x002\x003',
+ chkmap._search_key_func(('1', '2', '3')))
+
+ def test_search_key_is_passed_to_root_node(self):
+ chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
+ search_key_func=_test_search_key)
+ self.assertIs(_test_search_key, chkmap._search_key_func)
+ self.assertEqual('test:1\x002\x003',
+ chkmap._search_key_func(('1', '2', '3')))
+ self.assertEqual('test:1\x002\x003',
+ chkmap._root_node._search_key(('1', '2', '3')))
+
+ def test_search_key_passed_via__ensure_root(self):
+ chk_bytes = self.get_chk_bytes()
+ chkmap = chk_map.CHKMap(chk_bytes, None,
+ search_key_func=_test_search_key)
+ root_key = chkmap._save()
+ chkmap = chk_map.CHKMap(chk_bytes, root_key,
+ search_key_func=_test_search_key)
+ chkmap._ensure_root()
+ self.assertEqual('test:1\x002\x003',
+ chkmap._root_node._search_key(('1', '2', '3')))
+
+ def test_search_key_with_internal_node(self):
+ chk_bytes = self.get_chk_bytes()
+ chkmap = chk_map.CHKMap(chk_bytes, None,
+ search_key_func=_test_search_key)
+ chkmap._root_node.set_maximum_size(10)
+ chkmap.map(('1',), 'foo')
+ chkmap.map(('2',), 'bar')
+ chkmap.map(('3',), 'baz')
+ self.assertEqualDiff("'' InternalNode\n"
+ " 'test:1' LeafNode\n"
+ " ('1',) 'foo'\n"
+ " 'test:2' LeafNode\n"
+ " ('2',) 'bar'\n"
+ " 'test:3' LeafNode\n"
+ " ('3',) 'baz'\n"
+ , chkmap._dump_tree())
+ root_key = chkmap._save()
+ chkmap = chk_map.CHKMap(chk_bytes, root_key,
+ search_key_func=_test_search_key)
+ self.assertEqualDiff("'' InternalNode\n"
+ " 'test:1' LeafNode\n"
+ " ('1',) 'foo'\n"
+ " 'test:2' LeafNode\n"
+ " ('2',) 'bar'\n"
+ " 'test:3' LeafNode\n"
+ " ('3',) 'baz'\n"
+ , chkmap._dump_tree())
+
+
class TestLeafNode(TestCaseWithStore):
def test_current_size_empty(self):
More information about the bazaar-commits
mailing list