Rev 27: More instrumenting in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 2 21:17:08 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 27
revision-id: john at arbash-meinel.com-20080702201701-f40mgcxxf229qhei
parent: john at arbash-meinel.com-20080702195000-hgd1exet98nfxt7e
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 15:17:01 -0500
message:
More instrumenting
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 19:50:00 +0000
+++ b/btree_index.py 2008-07-02 20:17:01 +0000
@@ -41,9 +41,11 @@
_PAGE_SIZE = 4096
bloom_hits = 0
-leaf_value_hits = 0
-missing_hits = 0 # Found this key cached as missing
+leaf_value_hits = [0, 0]
+internal_node_hits = [0, 0]
+leaf_node_hits = [0, 0]
miss_attempts = 0 # Missed this entry while looking up
+bisect_shortcut = [0, 0]
class NodeBloom(BloomSHA1):
@@ -359,9 +361,6 @@
return nodes, bloom
-_key_not_present = object()
-
-
class BTreeGraphIndex(object):
"""Access to nodes via the standard GraphIndex interface for B+Tree's.
@@ -369,7 +368,7 @@
memory except when very large walks are done.
"""
- _default_use_blooms = True
+ _default_use_blooms = False
def __init__(self, transport, name, size=None):
"""Create a B+Tree index object on the index name.
@@ -388,7 +387,7 @@
self._page_size = transport.recommended_page_size()
self._root_node = None
# Default max size is 100,000 leave values
- self._leaf_value_cache = lru_cache.LRUCache(1000*1000)
+ self._leaf_value_cache = lru_cache.LRUCache(100*1000)
self._leaf_node_cache = lru_cache.LRUCache()
self._internal_node_cache = lru_cache.LRUCache()
self._key_count = None
@@ -438,7 +437,7 @@
found[node_pos] = node
return found
- def _get_nodes(self, cache, node_indexes):
+ def _get_nodes(self, cache, node_indexes, counter):
found = {}
needed = []
for idx in node_indexes:
@@ -447,8 +446,10 @@
continue
try:
found[idx] = cache[idx]
+ counter[0] += 1
except KeyError:
needed.append(idx)
+ counter[1] += 1
found.update(self._cache_nodes(needed, cache))
return found
@@ -457,17 +458,20 @@
After getting it, the node will be cached.
"""
- return self._get_nodes(self._internal_node_cache, node_indexes)
+ return self._get_nodes(self._internal_node_cache, node_indexes,
+ internal_node_hits)
def _get_leaf_nodes(self, node_indexes):
"""Get a bunch of nodes, from cache or disk."""
- # nodes = self._get_nodes(self._leaf_node_cache, node_indexes)
- found = {}
- for node_pos, node in self._read_nodes(sorted(node_indexes)):
- found[node_pos] = node
+ found = self._get_nodes(self._leaf_node_cache, node_indexes,
+ leaf_node_hits)
+ for node in found.itervalues():
for key, value in node.keys.iteritems():
- if key not in self._leaf_value_cache:
- self._leaf_value_cache[key] = value
+ if key in self._leaf_value_cache:
+ # Don't add the rest of the keys, we've seen this node
+ # before.
+ break
+ self._leaf_value_cache[key] = value
return found
def iter_all_entries(self):
@@ -522,7 +526,10 @@
# if (len(in_keys) + len(fixed_keys)
# > len(in_keys) * math.log(len(fixed_keys), 2)):
if len(in_keys) == 1: # Bisect will always be faster for M = 1
+ bisect_shortcut[0] += 1
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
+ else:
+ bisect_shortcut[1] += len(in_keys)
in_keys_iter = iter(in_keys)
fixed_keys_iter = enumerate(fixed_keys)
cur_in_key = in_keys_iter.next()
@@ -587,22 +594,20 @@
if not self.key_count():
return
# Find all the keys that are already in our value cache
- global leaf_value_hits, missing_hits, miss_attempts
+ global leaf_value_hits, miss_attempts
needed_keys = []
for key in keys:
value = self._leaf_value_cache.get(key, None)
if value is not None:
- if value is not _key_not_present:
- leaf_value_hits += 1
- # This key is known not to be here, skip it
- value, refs = value
- if self.node_ref_lists:
- yield (self, key, value, refs)
- else:
- yield (self, key, value)
+ leaf_value_hits[0] += 1
+ # This key is known not to be here, skip it
+ value, refs = value
+ if self.node_ref_lists:
+ yield (self, key, value, refs)
else:
- missing_hits += 1
+ yield (self, key, value)
else:
+ leaf_value_hits[1] += 1
needed_keys.append(key)
needed_keys = sorted(needed_keys)
@@ -653,7 +658,6 @@
yield (self, key, value)
else:
miss_attempts += 1
- self._leaf_value_cache[key] = _key_not_present
def iter_entries_prefix(self, keys):
"""Iterate over keys within the index using prefix matching.
=== modified file 'indexbench.py'
--- a/indexbench.py 2008-07-02 19:50:00 +0000
+++ b/indexbench.py 2008-07-02 20:17:01 +0000
@@ -54,16 +54,26 @@
def reset_hit_counts():
btree_index.bloom_hits = 0
- btree_index.missing_hits = 0
- btree_index.leaf_value_hits = 0
+ btree_index.leaf_value_hits[:] = [0, 0]
btree_index.miss_attempts = 0
+ btree_index.internal_node_hits[:] = [0, 0]
+ btree_index.leaf_node_hits[:] = [0, 0]
+ btree_index.bisect_shortcut[:] = [0, 0]
def hits():
- return "bloom(%d) missing(%d) leaf_value(%d) miss_attempts(%d)" % (
+ return ("\n bloom(%d) leaf_value(%d,%d) miss(%d), internal(%d,%d),"
+ "\n leaf_node(%d,%d) bisect(%d,%d)" % (
btree_index.bloom_hits,
- btree_index.missing_hits,
- btree_index.leaf_value_hits,
- btree_index.miss_attempts)
+ btree_index.leaf_value_hits[0],
+ btree_index.leaf_value_hits[1],
+ btree_index.miss_attempts,
+ btree_index.internal_node_hits[0],
+ btree_index.internal_node_hits[1],
+ btree_index.leaf_node_hits[0],
+ btree_index.leaf_node_hits[1],
+ btree_index.bisect_shortcut[0],
+ btree_index.bisect_shortcut[1],
+ ))
def time_index(names, source, factory, builder, target, label, name_keys,
text_keys, use_drop_cache, fixtures, lsprof, tip_revision_id, ancestry):
@@ -104,6 +114,7 @@
# random shuffle, all keys (name_keys comes preshuffled)
if 'shuffle' in fixtures:
drop_cache()
+ reset_hit_counts()
now = time.time()
for name, index in iter_indices(names, target, factory):
order = name_keys[name]
@@ -111,7 +122,7 @@
for key in order:
index.iter_entries([key]).next()
finish = time.time()
- print "%s: iter_random_one in %0.3f" % (label, finish - now)
+ print "%s: iter_random_one in %0.3f,%s" % (label, finish - now, hits())
# text extraction simulation (follow a compression graph) for text_keys
if 'text' in fixtures:
text_names = [name for name in names if name.endswith('.tix')]
@@ -190,7 +201,7 @@
for node in index.iter_entries(other_keys):
pass
finish = time.time()
- print "%s: miss torture in %0.3f, %s" % (label, finish - now, hits)
+ print "%s: miss torture in %0.3f, %s" % (label, finish - now, hits())
class cmd_indexbench(Command):
More information about the bazaar-commits
mailing list