Rev 3690: Clean out the global state, good for prototyping and tuning, bad for production code. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
John Arbash Meinel
john at arbash-meinel.com
Thu Aug 28 02:59:59 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
------------------------------------------------------------
revno: 3690
revision-id: john at arbash-meinel.com-20080828015958-bvdt8spf2ls57s39
parent: john at arbash-meinel.com-20080826005610-275jq9uqje3prqry
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Wed 2008-08-27 20:59:58 -0500
message:
Clean out the global state, good for prototyping and tuning, bad for production code.
(as recommended by Robert)
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-08-22 02:18:27 +0000
+++ b/bzrlib/btree_index.py 2008-08-28 01:59:58 +0000
@@ -53,13 +53,6 @@
# 4K per page: 4MB - 1000 entries
_NODE_CACHE_SIZE = 1000
-leaf_value_hits = [0, 0]
-internal_node_hits = [0, 0]
-leaf_node_hits = [0, 0]
-miss_attempts = 0 # Missed this entry while looking up
-bisect_shortcut = [0, 0]
-dupes = [0]
-
class _BuilderRow(object):
"""The stored state accumulated while writing out a row in the index.
@@ -622,7 +615,7 @@
found[node_pos] = node
return found
- def _get_nodes(self, cache, node_indexes, counter):
+ def _get_nodes(self, cache, node_indexes):
found = {}
needed = []
for idx in node_indexes:
@@ -631,10 +624,8 @@
continue
try:
found[idx] = cache[idx]
- counter[0] += 1
except KeyError:
needed.append(idx)
- counter[1] += 1
found.update(self._cache_nodes(needed, cache))
return found
@@ -643,13 +634,11 @@
After getting it, the node will be cached.
"""
- return self._get_nodes(self._internal_node_cache, node_indexes,
- internal_node_hits)
+ return self._get_nodes(self._internal_node_cache, node_indexes)
def _get_leaf_nodes(self, node_indexes):
"""Get a bunch of nodes, from cache or disk."""
- found = self._get_nodes(self._leaf_node_cache, node_indexes,
- leaf_node_hits)
+ found = self._get_nodes(self._leaf_node_cache, node_indexes)
if self._leaf_value_cache is not None:
for node in found.itervalues():
for key, value in node.keys.iteritems():
@@ -715,17 +704,13 @@
# iter_steps = len(in_keys) + len(fixed_keys)
# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
if len(in_keys) == 1: # Bisect will always be faster for M = 1
- bisect_shortcut[0] += 1
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
# elif bisect_steps < iter_steps:
- # bisect_shortcut[0] += len(in_keys)
# offsets = {}
# for key in in_keys:
# offsets.setdefault(bisect_right(fixed_keys, key),
# []).append(key)
# return [(o, offsets[o]) for o in sorted(offsets)]
- else:
- bisect_shortcut[1] += len(in_keys)
in_keys_iter = iter(in_keys)
fixed_keys_iter = enumerate(fixed_keys)
cur_in_key = in_keys_iter.next()
@@ -794,7 +779,6 @@
if not keys:
return
- global leaf_value_hits, miss_attempts, dupes
if not self.key_count():
return
@@ -805,7 +789,6 @@
for key in keys:
value = self._leaf_value_cache.get(key, None)
if value is not None:
- leaf_value_hits[0] += 1
# This key is known not to be here, skip it
value, refs = value
if self.node_ref_lists:
@@ -813,7 +796,6 @@
else:
yield (self, key, value)
else:
- leaf_value_hits[1] += 1
needed_keys.append(key)
last_key = None
@@ -857,8 +839,6 @@
yield (self, next_sub_key, value, refs)
else:
yield (self, next_sub_key, value)
- else:
- miss_attempts += 1
def iter_entries_prefix(self, keys):
"""Iterate over keys within the index using prefix matching.
=== modified file 'bzrlib/chunk_writer.py'
--- a/bzrlib/chunk_writer.py 2008-08-26 00:42:07 +0000
+++ b/bzrlib/chunk_writer.py 2008-08-28 01:59:58 +0000
@@ -20,10 +20,6 @@
import zlib
from zlib import Z_FINISH, Z_SYNC_FLUSH
-# [max_repack, buffer_full, repacks_with_space, min_compression,
-# total_bytes_in, total_bytes_out, avg_comp,
-# bytes_autopack, bytes_sync_packed, num_full_by_zsync]
-_stats = [0, 0, 0, 999, 0, 0, 0, 0, 0, 0]
class ChunkWriter(object):
"""ChunkWriter allows writing of compressed data with a fixed size.
@@ -40,54 +36,57 @@
will sometimes start over and compress the whole list to get tighter
packing. We get diminishing returns after a while, so this limits the
number of times we will try.
- In testing, some values for bzr.dev::
-
- repack time MB max full
- 1 7.5 4.6 1140 0
- 2 8.4 4.2 1036 1 6.8
- 3 9.8 4.1 1012 278
- 4 10.8 4.1 728 945
- 20 11.1 4.1 0 1012
-
- repack = 0
- zsync time MB repack max_z time w/ add_node
- 0 6.7 24.7 0 6270 5.0
- 1 6.5 13.2 0 3342 4.3
- 2 6.6 9.6 0 2414 4.9
- 5 6.5 6.2 0 1549 4.8
- 6 6.5 5.8 1 1435 4.8
- 7 6.6 5.5 19 1337 4.8
- 8 6.7 5.3 81 1220 4.4
- 10 6.8 5.0 260 967 5.3
- 11 6.8 4.9 366 839 5.3
- 12 6.9 4.8 454 731 5.1
- 15 7.2 4.7 704 450 5.8
- 20 7.7 4.6 1133 7 5.8
-
- In testing, some values for mysql-unpacked::
-
- next_bytes estim
- repack time MB hit_max full
- 1 51.7 15.4 3913 0
- 2 54.4 13.7 3467 0 35.4
- 20 67.0 13.4 0 3380 46.7
-
- repack=0
- zsync time w/ add_node
- 0 47.7 116.5 0 29782 29.5
- 1 48.5 60.2 0 15356 27.8
- 2 48.1 42.4 0 10822 27.8
- 5 48.3 25.5 0 6491 26.8
- 6 48.0 23.2 13 5896 27.3
- 7 48.1 21.6 29 5451 27.5
- 8 48.1 20.3 52 5108 27.1
- 10 46.9 18.6 195 4526 29.4
- 11 48.8 18.0 421 4143 29.2
- 12 47.4 17.5 702 3738 28.0
- 15 49.6 16.5 1223 2969 28.9
- 20 48.9 15.7 2182 1810 29.6
- 30 15.4 3891 23 31.4
+ The default is to try to avoid recompressing entirely, but setting this
+ to something like 20 will give maximum compression.
+
+ :cvar _max_zsync: Another tunable nob. If _max_repack is set to 0, then you
+ can limit the number of times we will try to pack more data into a
+ node. This allows us to do a single compression pass, rather than
+ trying until we overflow, and then recompressing again.
"""
+ # In testing, some values for bzr.dev::
+ # repack time MB max full
+ # 1 7.5 4.6 1140 0
+ # 2 8.4 4.2 1036 1 6.8
+ # 3 9.8 4.1 1012 278
+ # 4 10.8 4.1 728 945
+ # 20 11.1 4.1 0 1012
+ # repack = 0
+ # zsync time MB repack max_z time w/ add_node
+ # 0 6.7 24.7 0 6270 5.0
+ # 1 6.5 13.2 0 3342 4.3
+ # 2 6.6 9.6 0 2414 4.9
+ # 5 6.5 6.2 0 1549 4.8
+ # 6 6.5 5.8 1 1435 4.8
+ # 7 6.6 5.5 19 1337 4.8
+ # 8 6.7 5.3 81 1220 4.4
+ # 10 6.8 5.0 260 967 5.3
+ # 11 6.8 4.9 366 839 5.3
+ # 12 6.9 4.8 454 731 5.1
+ # 15 7.2 4.7 704 450 5.8
+ # 20 7.7 4.6 1133 7 5.8
+
+ # In testing, some values for mysql-unpacked::
+ # next_bytes estim
+ # repack time MB hit_max full
+ # 1 51.7 15.4 3913 0
+ # 2 54.4 13.7 3467 0 35.4
+ # 20 67.0 13.4 0 3380 46.7
+ # repack=0
+ # zsync time w/ add_node
+ # 0 47.7 116.5 0 29782 29.5
+ # 1 48.5 60.2 0 15356 27.8
+ # 2 48.1 42.4 0 10822 27.8
+ # 5 48.3 25.5 0 6491 26.8
+ # 6 48.0 23.2 13 5896 27.3
+ # 7 48.1 21.6 29 5451 27.5
+ # 8 48.1 20.3 52 5108 27.1
+ # 10 46.9 18.6 195 4526 29.4
+ # 11 48.8 18.0 421 4143 29.2
+ # 12 47.4 17.5 702 3738 28.0
+ # 15 49.6 16.5 1223 2969 28.9
+ # 20 48.9 15.7 2182 1810 29.6
+ # 30 15.4 3891 23 31.4
_max_repack = 0
_max_zsync = 8
@@ -125,16 +124,6 @@
out = self.compressor.flush(Z_FINISH)
self.bytes_list.append(out)
self.bytes_out_len += len(out)
- if self.num_repack > 0 and self.bytes_out_len > 0:
- comp = float(self.seen_bytes) / self.bytes_out_len
- if comp < _stats[3]:
- _stats[3] = comp
- _stats[4] += self.seen_bytes
- _stats[5] += self.bytes_out_len
- _stats[6] = float(_stats[4]) / _stats[5]
-
- if self._max_repack == 0 and self.num_repack == 1:
- _stats[9] += 1
if self.bytes_out_len > self.chunk_size:
raise AssertionError('Somehow we ended up with too much'
@@ -204,10 +193,8 @@
self.bytes_in.append(bytes)
self.seen_bytes += len(bytes)
self.unflushed_in_bytes += len(bytes)
- _stats[7] += 1 # len(bytes)
else:
# This may or may not fit, try to add it with Z_SYNC_FLUSH
- _stats[8] += 1 # len(bytes)
# Note: It is tempting to do this as a look-ahead pass, and to
# 'copy()' the compressor before flushing. However, it seems that
# 'flush()' is when the compressor actually does most work
@@ -248,11 +235,9 @@
# When we get *to* _max_repack, bump over so that the
# earlier > _max_repack will be triggered.
self.num_repack += 1
- _stats[0] += 1
if this_len + 10 > capacity:
(bytes_out, this_len,
compressor) = self._recompress_all_bytes_in()
- _stats[1] += 1
self.compressor = compressor
# Force us to not allow more data
self.num_repack = self._max_repack + 1
@@ -264,7 +249,6 @@
# This fits when we pack it tighter, so use the new packing
# There is one Z_SYNC_FLUSH call in
# _recompress_all_bytes_in
- _stats[2] += 1
self.compressor = compressor
self.bytes_in.append(bytes)
self.bytes_list = bytes_out
More information about the bazaar-commits
mailing list