Rev 3779: Teach CHKMap how to iter items in 2-tuple keyspaces. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Fri Nov 14 03:30:02 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3779
revision-id: robertc at robertcollins.net-20081114032957-uqh1lsxe0s21vt8x
parent: robertc at robertcollins.net-20081114021953-ckqpcsakzrk1ns1l
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2008-11-14 14:29:57 +1100
message:
Teach CHKMap how to iter items in 2-tuple keyspaces.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/tests/test_chk_map.py test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2008-11-14 02:13:12 +0000
+++ b/bzrlib/chk_map.py 2008-11-14 03:29:57 +0000
@@ -102,7 +102,7 @@
return stream.next().get_bytes_as('fulltext')
@classmethod
- def from_dict(klass, store, initial_value, maximum_size=0):
+ def from_dict(klass, store, initial_value, maximum_size=0, key_width=1):
"""Create a CHKMap in store with initial_value as the content.
:param store: The store to record initial_value in, a VersionedFiles
@@ -111,10 +111,13 @@
must be bytestrings.
:param maximum_size: The maximum_size rule to apply to nodes. This
determines the size at which no new data is added to a single node.
+ :param key_width: The number of elements in each key_tuple being stored
+ in this map.
:return: The root chk of te resulting CHKMap.
"""
result = CHKMap(store, None)
result._root_node.set_maximum_size(maximum_size)
+ result._root_node._key_width = key_width
delta = []
for key, value in initial_value.items():
delta.append((None, key, value))
@@ -425,10 +428,25 @@
return result
def iteritems(self, store, key_filter=None):
+ """Iterate over items in the node.
+
+ :param key_filter: A filter to apply to the node. It should be a
+ list/set/dict or similar repeatedly iterable container.
+ """
if key_filter is not None:
+ # Adjust the filter - short elements go to a prefix filter. Would this
+ # be cleaner explicitly? That would be no harder for InternalNode..
+ # XXX: perhaps defaultdict? Profiling<rinse and repeat>
+ filters = {}
+ for key in key_filter:
+ length_filter = filters.setdefault(len(key), set())
+ length_filter.add(key)
+ filters = filters.items()
for item in self._items.iteritems():
- if item[0] in key_filter:
- yield item
+ for length, length_filter in filters:
+ if item[0][:length] in length_filter:
+ yield item
+ break
else:
for item in self._items.iteritems():
yield item
@@ -597,14 +615,22 @@
else:
nodes.append(node)
else:
- serialised_filter = set([self._serialised_key(key) for key in
- key_filter])
+ # XXX defaultdict ?
+ length_filters = {}
+ for key in key_filter:
+ serialised_key = self._serialised_prefix_filter(key)
+ length_filter = length_filters.setdefault(len(serialised_key),
+ set())
+ length_filter.add(serialised_key)
+ length_filters = length_filters.items()
for prefix, node in self._items.iteritems():
- if prefix in serialised_filter:
- if type(node) == tuple:
- keys[node] = prefix
- else:
- nodes.append(node)
+ for length, length_filter in length_filters:
+ if prefix[:length] in length_filter:
+ if type(node) == tuple:
+ keys[node] = prefix
+ else:
+ nodes.append(node)
+ break
if keys:
# demand load some pages.
stream = store.get_record_stream(keys, 'unordered', True)
@@ -685,6 +711,12 @@
"""Return the serialised key for key in this node."""
return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
+ def _serialised_prefix_filter(self, key):
+ """Serialise key for use as a prefix filter in iteritems."""
+ if len(key) == self._key_width:
+ return self._serialised_key(key)
+ return '\x00'.join(key)[:self._node_width]
+
def _split(self, offset):
"""Split this node into smaller nodes starting at offset.
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2008-11-13 03:20:56 +0000
+++ b/bzrlib/tests/test_chk_map.py 2008-11-14 03:29:57 +0000
@@ -37,10 +37,11 @@
self.addCleanup(repo.abort_write_group)
return repo.chk_bytes
- def _get_map(self, a_dict, maximum_size=0, chk_bytes=None):
+ def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1):
if chk_bytes is None:
chk_bytes = self.get_chk_bytes()
- root_key = CHKMap.from_dict(chk_bytes, a_dict, maximum_size=maximum_size)
+ root_key = CHKMap.from_dict(chk_bytes, a_dict,
+ maximum_size=maximum_size, key_width=key_width)
chkmap = CHKMap(chk_bytes, root_key)
return chkmap
@@ -251,6 +252,23 @@
self.assertEqual({("a",): "content here"},
self.to_dict(chkmap, [("a",)]))
+ def test_iteritems_keys_prefixed_by_2_width_nodes(self):
+ chkmap = self._get_map(
+ {("a","a"):"content here", ("a", "b",):"more content",
+ ("b", ""): 'boring content'},
+ maximum_size=10, key_width=2)
+ self.assertEqual(
+ {("a", "a"): "content here", ("a", "b"): 'more content'},
+ self.to_dict(chkmap, [("a",)]))
+
+ def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
+ chkmap = self._get_map(
+ {("a","a"):"content here", ("a", "b",):"more content",
+ ("b", ""): 'boring content'}, key_width=2)
+ self.assertEqual(
+ {("a", "a"): "content here", ("a", "b"): 'more content'},
+ self.to_dict(chkmap, [("a",)]))
+
def test___len__empty(self):
chkmap = self._get_map({})
self.assertEqual(0, len(chkmap))
More information about the bazaar-commits
mailing list