Rev 15: Change the looping structure for iter_entries in http://bzr.arbash-meinel.com/plugins/index2

John Arbash Meinel john at arbash-meinel.com
Wed Jul 2 05:54:29 BST 2008


At http://bzr.arbash-meinel.com/plugins/index2

------------------------------------------------------------
revno: 15
revision-id: john at arbash-meinel.com-20080702045350-i5vjt3tqkyvsh3re
parent: john at arbash-meinel.com-20080702032221-9qn5l8n7owpg11u7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Tue 2008-07-01 23:53:50 -0500
message:
  Change the looping structure for iter_entries
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-02 03:22:21 +0000
+++ b/btree_index.py	2008-07-02 04:53:50 +0000
@@ -325,6 +325,20 @@
             self._cache_nodes([node_index])
             return self._node_cache[node_index]
 
+    def _get_nodes(self, node_indexes):
+        """Get a bunch of nodes, from cache or disk."""
+        found = {}
+        needed = []
+        for idx in node_indexes:
+            try:
+                found[idx] = self._node_cache[idx]
+            except KeyError:
+                needed.append(idx)
+        self._cache_nodes(needed)
+        for idx in needed:
+            found[idx] = self._node_cache[idx]
+        return found
+
     def iter_all_entries(self):
         """Iterate over all keys within the index.
 
@@ -498,43 +512,34 @@
             self._cache_nodes([0])
         if not self._key_count:
             return
-        node = self._get_node(0)
+        nodes_and_keys = [(0, keys)]
+        row_offset = 0
         for row_length in self._row_lengths:
-            next_nodes = []
-
-            # Split up the keys based on which nodes will work best next
-            # Both the node keys and are internal list are both sorted, so we
-            # just step both of them to find the right locations.
-        for key in keys:
-            # repeated single-bisection : 'make it work'
-            # FUTURE: look within the current node for the next key,
-            # consider sequential scans
-            # consider block reads
-            # gather stats on hit rate etc
-            # start at the root:
-            node = self._get_node(0)
-            # what row are we looking at
-            row = 0
-            # how many nodes have the previous rows eaten up
-            offset = self._row_lengths[row]
-            while type(node) != _LeafNode:
-                pos = bisect_right(node.keys, key)
-                # There are len(node.keys) pointers dividing len(node.keys) + 1
-                # child nodes. The first pointer is the first key in the second
-                # child node, and so on.
-                # pos indexes into the list of child nodes, any key equal to a
-                # key in node.keys indicates that the key is present to the right
-                # of that divider.
-                node_index = offset + node.offset + pos
-                row += 1
-                offset = offset + self._row_lengths[row]
-                node = self._get_node(node_index)
-            if key in node.keys:
-                value, refs = node.keys[key]
-                if self.node_ref_lists:
-                    yield (self, key, value, refs)
-                else:
-                    yield (self, key, value)
+            node_indexes = [idx for idx, s_keys in nodes_and_keys]
+            nodes = self._get_nodes(node_indexes)
+
+            row_offset += row_length
+
+            next_nodes_and_keys = []
+            for node_index, sub_keys in nodes_and_keys:
+                node = nodes[node_index]
+                positions = self._multi_bisect_right(sub_keys, node.keys)
+                node_offset = row_offset + node.offset
+                next_nodes_and_keys.extend([(node_offset + pos, s_keys)
+                                           for pos, s_keys in positions])
+            nodes_and_keys = next_nodes_and_keys
+        # We should now be at the _LeafNodes
+        node_indexes = [idx for idx, s_keys in nodes_and_keys]
+        nodes = self._get_nodes(node_indexes)
+        for node_index, sub_keys in nodes_and_keys:
+            node = nodes[node_index]
+            for key in sub_keys:
+                if key in node.keys:
+                    value, refs = node.keys[key]
+                    if self.node_ref_lists:
+                        yield (self, key, value, refs)
+                    else:
+                        yield (self, key, value)
 
     def iter_entries_prefix(self, keys):
         """Iterate over keys within the index using prefix matching.



More information about the bazaar-commits mailing list