Rev 29: disable the alternative multi-way bisecting, and allow the leaf_value cache to be disabled. in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 2 21:44:15 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 29
revision-id: john at arbash-meinel.com-20080702204409-s512bff7tus0cb8n
parent: john at arbash-meinel.com-20080702202600-3q9fnpmwedog3j5v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 15:44:09 -0500
message:
disable the alternative multi-way bisecting, and allow the leaf_value cache to be disabled.
LRUCaches and random access do not play well together.
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 20:26:00 +0000
+++ b/btree_index.py 2008-07-02 20:44:09 +0000
@@ -388,7 +388,7 @@
self._page_size = transport.recommended_page_size()
self._root_node = None
# Default max size is 100,000 leave values
- self._leaf_value_cache = lru_cache.LRUCache(100*1000)
+ self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
self._leaf_node_cache = lru_cache.LRUCache()
self._internal_node_cache = lru_cache.LRUCache()
self._key_count = None
@@ -466,13 +466,14 @@
"""Get a bunch of nodes, from cache or disk."""
found = self._get_nodes(self._leaf_node_cache, node_indexes,
leaf_node_hits)
- for node in found.itervalues():
- for key, value in node.keys.iteritems():
- if key in self._leaf_value_cache:
- # Don't add the rest of the keys, we've seen this node
- # before.
- break
- self._leaf_value_cache[key] = value
+ if self._leaf_value_cache is not None:
+ for node in found.itervalues():
+ for key, value in node.keys.iteritems():
+ if key in self._leaf_value_cache:
+ # Don't add the rest of the keys, we've seen this node
+ # before.
+ break
+ self._leaf_value_cache[key] = value
return found
def iter_all_entries(self):
@@ -524,20 +525,18 @@
# based on which has the fewer number of steps.
# There is also the argument that bisect_right is a compiled
# function, so there is even more to be gained.
- # if (len(in_keys) + len(fixed_keys)
- # > len(in_keys) * math.log(len(fixed_keys), 2)):
- iter_steps = len(in_keys) + len(fixed_keys)
- bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
+ # iter_steps = len(in_keys) + len(fixed_keys)
+ # bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
if len(in_keys) == 1: # Bisect will always be faster for M = 1
bisect_shortcut[0] += 1
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
- elif bisect_steps < iter_steps:
- bisect_shortcut[0] += len(in_keys)
- offsets = {}
- for key in in_keys:
- offsets.setdefault(bisect_right(fixed_keys, key),
- []).append(key)
- return [(o, offsets[o]) for o in sorted(offsets)]
+ # elif bisect_steps < iter_steps:
+ # bisect_shortcut[0] += len(in_keys)
+ # offsets = {}
+ # for key in in_keys:
+ # offsets.setdefault(bisect_right(fixed_keys, key),
+ # []).append(key)
+ # return [(o, offsets[o]) for o in sorted(offsets)]
else:
bisect_shortcut[1] += len(in_keys)
in_keys_iter = iter(in_keys)
@@ -611,19 +610,22 @@
# Find all the keys that are already in our value cache
global leaf_value_hits, miss_attempts
needed_keys = []
- for key in keys:
- value = self._leaf_value_cache.get(key, None)
- if value is not None:
- leaf_value_hits[0] += 1
- # This key is known not to be here, skip it
- value, refs = value
- if self.node_ref_lists:
- yield (self, key, value, refs)
+ if self._leaf_value_cache is None:
+ needed_keys = keys
+ else:
+ for key in keys:
+ value = self._leaf_value_cache.get(key, None)
+ if value is not None:
+ leaf_value_hits[0] += 1
+ # This key is known not to be here, skip it
+ value, refs = value
+ if self.node_ref_lists:
+ yield (self, key, value, refs)
+ else:
+ yield (self, key, value)
else:
- yield (self, key, value)
- else:
- leaf_value_hits[1] += 1
- needed_keys.append(key)
+ leaf_value_hits[1] += 1
+ needed_keys.append(key)
needed_keys = sorted(needed_keys)
nodes_and_keys = [(0, needed_keys)]
More information about the bazaar-commits
mailing list