Rev 3779: Teach CHKMap how to iter items in 2-tuple keyspaces. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Fri Nov 14 03:30:02 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 3779
revision-id: robertc at robertcollins.net-20081114032957-uqh1lsxe0s21vt8x
parent: robertc at robertcollins.net-20081114021953-ckqpcsakzrk1ns1l
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2008-11-14 14:29:57 +1100
message:
  Teach CHKMap how to iter items in 2-tuple keyspaces.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/tests/test_chk_map.py   test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-11-14 02:13:12 +0000
+++ b/bzrlib/chk_map.py	2008-11-14 03:29:57 +0000
@@ -102,7 +102,7 @@
         return stream.next().get_bytes_as('fulltext')
 
     @classmethod
-    def from_dict(klass, store, initial_value, maximum_size=0):
+    def from_dict(klass, store, initial_value, maximum_size=0, key_width=1):
         """Create a CHKMap in store with initial_value as the content.
         
         :param store: The store to record initial_value in, a VersionedFiles
@@ -111,10 +111,13 @@
             must be bytestrings.
         :param maximum_size: The maximum_size rule to apply to nodes. This
             determines the size at which no new data is added to a single node.
+        :param key_width: The number of elements in each key_tuple being stored
+            in this map.
         :return: The root chk of te resulting CHKMap.
         """
         result = CHKMap(store, None)
         result._root_node.set_maximum_size(maximum_size)
+        result._root_node._key_width = key_width
         delta = []
         for key, value in initial_value.items():
             delta.append((None, key, value))
@@ -425,10 +428,25 @@
         return result
 
     def iteritems(self, store, key_filter=None):
+        """Iterate over items in the node.
+
+        :param key_filter: A filter to apply to the node. It should be a
+            list/set/dict or similar repeatedly iterable container.
+        """
         if key_filter is not None:
+            # Adjust the filter - short elements go to a prefix filter. Would this
+            # be cleaner explicitly? That would be no harder for InternalNode..
+            # XXX: perhaps defaultdict? Profiling<rinse and repeat>
+            filters = {}
+            for key in key_filter:
+                length_filter = filters.setdefault(len(key), set())
+                length_filter.add(key)
+            filters = filters.items()
             for item in self._items.iteritems():
-                if item[0] in key_filter:
-                    yield item
+                for length, length_filter in filters:
+                    if item[0][:length] in length_filter:
+                        yield item
+                        break
         else:
             for item in self._items.iteritems():
                 yield item
@@ -597,14 +615,22 @@
                 else:
                     nodes.append(node)
         else:
-            serialised_filter = set([self._serialised_key(key) for key in
-                key_filter])
+            # XXX defaultdict ?
+            length_filters = {}
+            for key in key_filter:
+                serialised_key = self._serialised_prefix_filter(key)
+                length_filter = length_filters.setdefault(len(serialised_key),
+                    set())
+                length_filter.add(serialised_key)
+            length_filters = length_filters.items()
             for prefix, node in self._items.iteritems():
-                if prefix in serialised_filter:
-                    if type(node) == tuple:
-                        keys[node] = prefix
-                    else:
-                        nodes.append(node)
+                for length, length_filter in length_filters:
+                    if prefix[:length] in length_filter:
+                        if type(node) == tuple:
+                            keys[node] = prefix
+                        else:
+                            nodes.append(node)
+                        break
         if keys:
             # demand load some pages.
             stream = store.get_record_stream(keys, 'unordered', True)
@@ -685,6 +711,12 @@
         """Return the serialised key for key in this node."""
         return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
 
+    def _serialised_prefix_filter(self, key):
+        """Serialise key for use as a prefix filter in iteritems."""
+        if len(key) == self._key_width:
+            return self._serialised_key(key)
+        return '\x00'.join(key)[:self._node_width]
+
     def _split(self, offset):
         """Split this node into smaller nodes starting at offset.
 

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2008-11-13 03:20:56 +0000
+++ b/bzrlib/tests/test_chk_map.py	2008-11-14 03:29:57 +0000
@@ -37,10 +37,11 @@
         self.addCleanup(repo.abort_write_group)
         return repo.chk_bytes
 
-    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None):
+    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1):
         if chk_bytes is None:
             chk_bytes = self.get_chk_bytes()
-        root_key = CHKMap.from_dict(chk_bytes, a_dict, maximum_size=maximum_size)
+        root_key = CHKMap.from_dict(chk_bytes, a_dict,
+            maximum_size=maximum_size, key_width=key_width)
         chkmap = CHKMap(chk_bytes, root_key)
         return chkmap
 
@@ -251,6 +252,23 @@
         self.assertEqual({("a",): "content here"},
             self.to_dict(chkmap, [("a",)]))
 
+    def test_iteritems_keys_prefixed_by_2_width_nodes(self):
+        chkmap = self._get_map(
+            {("a","a"):"content here", ("a", "b",):"more content",
+             ("b", ""): 'boring content'},
+            maximum_size=10, key_width=2)
+        self.assertEqual(
+            {("a", "a"): "content here", ("a", "b"): 'more content'},
+            self.to_dict(chkmap, [("a",)]))
+
+    def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
+        chkmap = self._get_map(
+            {("a","a"):"content here", ("a", "b",):"more content",
+             ("b", ""): 'boring content'}, key_width=2)
+        self.assertEqual(
+            {("a", "a"): "content here", ("a", "b"): 'more content'},
+            self.to_dict(chkmap, [("a",)]))
+
     def test___len__empty(self):
         chkmap = self._get_map({})
         self.assertEqual(0, len(chkmap))




More information about the bazaar-commits mailing list