Rev 3646: For iter_all and three_level tests adjust spill-at. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

Wed Aug 20 20:34:31 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

------------------------------------------------------------
revno: 3646
revision-id: john at arbash-meinel.com-20080820193429-0v5jm5zd4gggejpx
parent: john at arbash-meinel.com-20080820185434-t1y91biejviv7skx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Wed 2008-08-20 14:34:29 -0500
message:
  For iter_all and three_level tests adjust spill-at.
  It turns out that when adding 200k entries, we spill to disk 2 times.
  Which adds 20s to the test for each spill to disk.
  So instead of 20s for 1 flush, we get 60s (for three_level).
  This does increase memory consumption, but it drops the test
  time from 64s => 35s.
modified:
  bzrlib/chunk_writer.py         chunk_writer.py-20080630234519-6ggn4id17nipovny-1
  bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
-------------- next part --------------
=== modified file 'bzrlib/chunk_writer.py'

--- a/bzrlib/chunk_writer.py	2008-08-20 18:54:34 +0000
+++ b/bzrlib/chunk_writer.py	2008-08-20 19:34:29 +0000
@@ -73,7 +73,7 @@
         """
         self.bytes_in = None # Free the data cached so far, we don't need it
         self.bytes_list.append(self.compressor.flush(Z_FINISH))
-        total_len = sum(len(b) for b in self.bytes_list)
+        total_len = sum(map(len, self.bytes_list))
         if total_len > self.chunk_size:
             raise AssertionError('Somehow we ended up with too much'
                                  ' compressed data, %d > %d'
@@ -86,17 +86,19 @@
     def _recompress_all_bytes_in(self, extra_bytes=None):
         compressor = zlib.compressobj()
         bytes_out = []
+        append = bytes_out.append
+        compress = compressor.compress
         for accepted_bytes in self.bytes_in:
-            out = compressor.compress(accepted_bytes)
+            out = compress(accepted_bytes)
             if out:
-                bytes_out.append(out)
+                append(out)
         if extra_bytes:
-            out = compressor.compress(extra_bytes)
+            out = compress(extra_bytes)
             if out:
-                bytes_out.append(out)
+                append(out)
             out = compressor.flush(Z_SYNC_FLUSH)
             if out:
-                bytes_out.append(out)
+                append(out)
         return bytes_out, compressor
 
     def write(self, bytes):

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2008-08-20 18:30:41 +0000
+++ b/bzrlib/tests/test_btree_index.py	2008-08-20 19:34:29 +0000
@@ -16,6 +16,7 @@
 
 """Tests for btree indices."""
 
+import time
 import pprint
 import zlib
 
@@ -275,14 +276,25 @@
         # pointer to the second node that the internal node is for, _not_
         # the first, otherwise the first node overlaps with the last node of
         # the prior internal node on that row.
-        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+        # We will be adding 200,000 nodes, so spill at 200,001 to prevent
+        # having to flush anything out to disk.
+        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
+            spill_at=200001)
         # 200K nodes is enough to create a two internal nodes on the second level
+        tstart = time.time()
         nodes = self.make_nodes(100000, 2, 2)
+        delta_make = time.time() - tstart
+
+        tstart = time.time()
         for node in nodes:
             builder.add_node(*node)
+        delta = time.time() - tstart
         transport = get_transport('trace+' + self.get_url(''))
+        tstart = time.time()
         size = transport.put_file('index', builder.finish())
+        delta_flush = time.time() - tstart
         del builder
+        # print "\n  Spent %.3fs creating and %.3fs adding nodes and %.3fs flushing" % (delta_make, delta, delta_flush)
         index = btree_index.BTreeGraphIndex(transport, 'index', size)
         # Seed the metadata, we're using internal calls now.
         index.key_count()
@@ -678,7 +690,8 @@
     def test_iter_all_entries_reads(self):
         # iterating all entries reads the header, then does a linear
         # read.
-        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
+                                           spill_at=200001)
         # 200k nodes is enough to create a three-level index.
         nodes = self.make_nodes(100000, 2, 2)
         for node in nodes: