Rev 3673: Update the stats for the current code layout. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 22 04:58:21 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
------------------------------------------------------------
revno: 3673
revision-id: john at arbash-meinel.com-20080822035819-yx19e7qxdvjgaeql
parent: john at arbash-meinel.com-20080822022908-420tr0519tdz6pxy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 22:58:19 -0500
message:
Update the stats for the current code layout.
This shows why I like _max_repack=2 so much. It is
the highest value that has 'no waste'.
At _max_repack=2, you can always sneak in 1 more
line, which avoids triggering an extra repack.
Also, updating the timings with the current tuning.
modified:
bzrlib/chunk_writer.py chunk_writer.py-20080630234519-6ggn4id17nipovny-1
bzrlib/tests/test_chunk_writer.py test_chunk_writer.py-20080630234519-6ggn4id17nipovny-2
-------------- next part --------------
=== modified file 'bzrlib/chunk_writer.py'
--- a/bzrlib/chunk_writer.py 2008-08-22 02:09:36 +0000
+++ b/bzrlib/chunk_writer.py 2008-08-22 03:58:19 +0000
@@ -20,6 +20,7 @@
import zlib
from zlib import Z_FINISH, Z_SYNC_FLUSH
+_stats = [0, 0, 0]
class ChunkWriter(object):
"""ChunkWriter allows writing of compressed data with a fixed size.
@@ -38,23 +39,21 @@
number of times we will try.
In testing, some values for bzr.dev::
- w/o copy w/ copy w/ copy ins w/ copy & save
- repack time MB time MB time MB time MB
- 1 8.8 5.1 8.9 5.1 9.6 4.4 12.5 4.1
- 2 9.6 4.4 10.1 4.3 10.4 4.2 11.1 4.1
- 3 10.6 4.2 11.1 4.1 11.2 4.1 11.3 4.1
- 4 12.0 4.1
- 5 12.6 4.1
- 20 12.9 4.1 12.2 4.1 12.3 4.1
+ repack time MB hit_max_repack buffer_full
+ 1 7.9 5.1 1268 0
+ 2 8.8 4.4 1069 0
+ 3 9.7 4.2 1022 46
+ 4 11.1 4.1 974 619
+ 20 11.9 4.1 0 1012
In testing, some values for mysql-unpacked::
- w/o copy w/ copy w/ copy ins w/ copy & save
- repack time MB time MB time MB time MB
- 1 56.6 16.9 60.7 14.2
- 2 59.3 14.1 62.6 13.5 64.3 13.4
- 3 64.4 13.5
- 20 73.4 13.4
+ repack time MB hit_max_repack buffer_full
+ 1 52.4 16.9 4295 0
+ 2 55.8 14.1 3561 0
+ 3 60.3 13.5 3407 197
+ 4 66.7 13.4 3203 2154
+ 20 69.3 13.4 0 3380
:cvar _default_min_compression_size: The expected minimum compression.
While packing nodes into the page, we won't Z_SYNC_FLUSH until we have
@@ -162,8 +161,8 @@
self.bytes_in.append(bytes)
self.seen_bytes = next_seen_size
else:
- if self.num_repack >= self._max_repack and not reserved:
- # We already know we don't want to try to fit more
+ if self.num_repack > self._max_repack and not reserved:
+ self.unused_bytes = bytes
return True
# This may or may not fit, try to add it with Z_SYNC_FLUSH
out = comp.compress(bytes)
@@ -171,17 +170,27 @@
if out:
self.bytes_list.append(out)
self.bytes_out_len += len(out)
- if self.bytes_out_len + 10 > capacity:
+ if self.bytes_out_len + 10 <= capacity:
+ # It fit, so mark it added
+ self.bytes_in.append(bytes)
+ self.seen_bytes = next_seen_size
+ else:
# We are over budget, try to squeeze this in without any
# Z_SYNC_FLUSH calls
self.num_repack += 1
- bytes_out, this_len, compressor = self._recompress_all_bytes_in(bytes)
+ (bytes_out, this_len,
+ compressor) = self._recompress_all_bytes_in(bytes)
+ if self.num_repack >= self._max_repack:
+ # When we get *to* _max_repack, bump over so that the
+ # earlier > _max_repack will be triggered.
+ self.num_repack += 1
+ _stats[0] += 1
if this_len + 10 > capacity:
- # No way we can add anymore, we need to re-pack because our
- # compressor is now out of sync.
- # This seems to be rarely triggered over
- # num_repack > _max_repack
- bytes_out, this_len, compressor = self._recompress_all_bytes_in()
+ # In real-world testing, this only happens when _max_repack
+ # is set >2, and even then rarely (46 out of 1022)
+ (bytes_out, this_len,
+ compressor) = self._recompress_all_bytes_in()
+ _stats[1] += 1
self.compressor = compressor
self.bytes_list = bytes_out
self.bytes_out_len = this_len
@@ -191,13 +200,10 @@
# This fits when we pack it tighter, so use the new packing
# There is one Z_SYNC_FLUSH call in
# _recompress_all_bytes_in
+ _stats[2] += 1
self.compressor = compressor
self.bytes_in.append(bytes)
self.bytes_list = bytes_out
self.bytes_out_len = this_len
- else:
- # It fit, so mark it added
- self.bytes_in.append(bytes)
- self.seen_bytes = next_seen_size
return False
=== modified file 'bzrlib/tests/test_chunk_writer.py'
--- a/bzrlib/tests/test_chunk_writer.py 2008-08-22 02:09:36 +0000
+++ b/bzrlib/tests/test_chunk_writer.py 2008-08-22 03:58:19 +0000
@@ -78,9 +78,12 @@
# Create a line with this group
lines.append(''.join(map(str, numbers)) + '\n')
writer = chunk_writer.ChunkWriter(4096, 256)
- for line in lines:
+ for idx, line in enumerate(lines):
if writer.write(line):
+ self.assertEqual(44, idx)
break
+ else:
+ self.fail('We were able to write all lines')
self.assertFalse(writer.write("A"*256, reserved=True))
bytes_list, unused, _ = writer.finish()
node_bytes = self.check_chunk(bytes_list, 4096)
More information about the bazaar-commits
mailing list