Rev 30: temporary work to revert back to key-at-a-time to test performance in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 2 22:00:52 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 30
revision-id: john at arbash-meinel.com-20080702210046-j00t51pj8kod09hz
parent: john at arbash-meinel.com-20080702204409-s512bff7tus0cb8n
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 16:00:46 -0500
message:
temporary work to revert back to key-at-a-time to test performance
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 20:44:09 +0000
+++ b/btree_index.py 2008-07-02 21:00:46 +0000
@@ -607,66 +607,33 @@
return
if not self.key_count():
return
- # Find all the keys that are already in our value cache
- global leaf_value_hits, miss_attempts
- needed_keys = []
- if self._leaf_value_cache is None:
- needed_keys = keys
- else:
- for key in keys:
- value = self._leaf_value_cache.get(key, None)
- if value is not None:
- leaf_value_hits[0] += 1
- # This key is known not to be here, skip it
- value, refs = value
- if self.node_ref_lists:
- yield (self, key, value, refs)
- else:
- yield (self, key, value)
- else:
- leaf_value_hits[1] += 1
- needed_keys.append(key)
- needed_keys = sorted(needed_keys)
-
- nodes_and_keys = [(0, needed_keys)]
- row_offset = 0
-
- if self._use_blooms:
- global bloom_hits
- if len(self._row_lengths) > 1:
- # no internal nodes, and thus no blooms in a length-1 b+tree.
- bloom_raw_offsets = NodeBloom.get_raw_offsets(needed_keys)
- for row_length in self._row_lengths[:-1]:
- node_indexes = [idx for idx, s_keys in nodes_and_keys]
- nodes = self._get_internal_nodes(node_indexes)
-
- row_offset += row_length
-
- next_nodes_and_keys = []
- for node_index, sub_keys in nodes_and_keys:
- node = nodes[node_index]
+ global bloom_hits, miss_attempts
+ for key in keys:
+ # what row are we looking at
+ node_index = 0
+ row = 0
+ # how many nodes have the previous rows eaten up
+ offset = self._row_lengths[row]
+ string_key = '\x00'.join(key)
+ for row_length in self._row_lengths[:-1]:
+ node = self._get_internal_nodes([node_index])[node_index]
if self._use_blooms and node.bloom is not None:
- # If using blooms, filter out any nodes that don't hit.
- remaining_sub_keys = [sub_key for sub_key in sub_keys
- if node.bloom.contains_raw_offset(bloom_raw_offsets[sub_key])]
- bloom_hits += len(sub_keys) - len(remaining_sub_keys)
- sub_keys = remaining_sub_keys
- # For misses at this level, do we want to cache that fact?
- positions = self._multi_bisect_right(sub_keys, node.keys)
- node_offset = row_offset + node.offset
- next_nodes_and_keys.extend([(node_offset + pos, s_keys)
- for pos, s_keys in positions])
- nodes_and_keys = next_nodes_and_keys
- # We should now be at the _LeafNodes
- node_indexes = [idx for idx, s_keys in nodes_and_keys]
- # This bypasses the cache, but only for the leaf nodes
- # We do it right now, because otherwise we swap the cache,
- # alternatively, we may *not* want to always read all the nodes in one
- # big go. Consider setting a max size on this.
- nodes = self._get_leaf_nodes(node_indexes)
- for node_index, sub_keys in nodes_and_keys:
- node = nodes[node_index]
- for key in sub_keys:
+ if string_key not in node.bloom:
+ bloom_hits += 1
+ node_index = None
+ break
+ pos = bisect_right(node.keys, key)
+ # There are len(node.keys) pointers dividing len(node.keys) + 1
+ # child nodes. The first pointer is the first key in the second
+ # child node, and so on.
+ # pos indexes into the list of child nodes, any key equal to a
+ # key in node.keys indicates that the key is present to the right
+ # of that divider.
+ node_index = offset + node.offset + pos
+ row += 1
+ offset = offset + self._row_lengths[row]
+ if node_index is not None:
+ node = self._get_leaf_nodes([node_index])[node_index]
if key in node.keys:
value, refs = node.keys[key]
if self.node_ref_lists:
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-02 18:26:56 +0000
+++ b/tests/test_btree_index.py 2008-07-02 21:00:46 +0000
@@ -17,6 +17,7 @@
"""Tests for btree indices."""
+import pprint
import random
import zlib
More information about the bazaar-commits
mailing list