Rev 15: Change the looping structure for iter_entries in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 2 05:54:29 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 15
revision-id: john at arbash-meinel.com-20080702045350-i5vjt3tqkyvsh3re
parent: john at arbash-meinel.com-20080702032221-9qn5l8n7owpg11u7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Tue 2008-07-01 23:53:50 -0500
message:
Change the looping structure for iter_entries
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 03:22:21 +0000
+++ b/btree_index.py 2008-07-02 04:53:50 +0000
@@ -325,6 +325,20 @@
self._cache_nodes([node_index])
return self._node_cache[node_index]
+ def _get_nodes(self, node_indexes):
+ """Get a bunch of nodes, from cache or disk."""
+ found = {}
+ needed = []
+ for idx in node_indexes:
+ try:
+ found[idx] = self._node_cache[idx]
+ except KeyError:
+ needed.append(idx)
+ self._cache_nodes(needed)
+ for idx in needed:
+ found[idx] = self._node_cache[idx]
+ return found
+
def iter_all_entries(self):
"""Iterate over all keys within the index.
@@ -498,43 +512,34 @@
self._cache_nodes([0])
if not self._key_count:
return
- node = self._get_node(0)
+ nodes_and_keys = [(0, keys)]
+ row_offset = 0
for row_length in self._row_lengths:
- next_nodes = []
-
- # Split up the keys based on which nodes will work best next
- # Both the node keys and are internal list are both sorted, so we
- # just step both of them to find the right locations.
- for key in keys:
- # repeated single-bisection : 'make it work'
- # FUTURE: look within the current node for the next key,
- # consider sequential scans
- # consider block reads
- # gather stats on hit rate etc
- # start at the root:
- node = self._get_node(0)
- # what row are we looking at
- row = 0
- # how many nodes have the previous rows eaten up
- offset = self._row_lengths[row]
- while type(node) != _LeafNode:
- pos = bisect_right(node.keys, key)
- # There are len(node.keys) pointers dividing len(node.keys) + 1
- # child nodes. The first pointer is the first key in the second
- # child node, and so on.
- # pos indexes into the list of child nodes, any key equal to a
- # key in node.keys indicates that the key is present to the right
- # of that divider.
- node_index = offset + node.offset + pos
- row += 1
- offset = offset + self._row_lengths[row]
- node = self._get_node(node_index)
- if key in node.keys:
- value, refs = node.keys[key]
- if self.node_ref_lists:
- yield (self, key, value, refs)
- else:
- yield (self, key, value)
+ node_indexes = [idx for idx, s_keys in nodes_and_keys]
+ nodes = self._get_nodes(node_indexes)
+
+ row_offset += row_length
+
+ next_nodes_and_keys = []
+ for node_index, sub_keys in nodes_and_keys:
+ node = nodes[node_index]
+ positions = self._multi_bisect_right(sub_keys, node.keys)
+ node_offset = row_offset + node.offset
+ next_nodes_and_keys.extend([(node_offset + pos, s_keys)
+ for pos, s_keys in positions])
+ nodes_and_keys = next_nodes_and_keys
+ # We should now be at the _LeafNodes
+ node_indexes = [idx for idx, s_keys in nodes_and_keys]
+ nodes = self._get_nodes(node_indexes)
+ for node_index, sub_keys in nodes_and_keys:
+ node = nodes[node_index]
+ for key in sub_keys:
+ if key in node.keys:
+ value, refs = node.keys[key]
+ if self.node_ref_lists:
+ yield (self, key, value, refs)
+ else:
+ yield (self, key, value)
def iter_entries_prefix(self, keys):
"""Iterate over keys within the index using prefix matching.
More information about the bazaar-commits
mailing list