Rev 4709: Use a cStringIO.StringIO for 1-page btree indexes. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b1-small_btree_no_disk

John Arbash Meinel john at arbash-meinel.com
Mon Sep 21 19:36:12 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b1-small_btree_no_disk

------------------------------------------------------------
revno: 4709
revision-id: john at arbash-meinel.com-20090921183604-p1g8svzkwklc9bsw
parent: pqm at pqm.ubuntu.com-20090919230718-vm9zqiov9h3t9bzg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b1-small_btree_no_disk
timestamp: Mon 2009-09-21 13:36:04 -0500
message:
  Use a cStringIO.StringIO for 1-page btree indexes.
  
  bzr-search creates a *lot* of tiny indices (on a big project, it isn't uncommon
  to have 200k indices.)
  This means we generate 2 temp files for every index, which is overkill.
  Changing it to use a StringIO while we have <= 1 page ends up being a big win.
  Indexing just 'bzr-search' itself drops from 4.3s => 1.0s on Windows.
  (Especially since temp files are more expensive here.)
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-09-14 01:48:28 +0000
+++ b/bzrlib/btree_index.py	2009-09-21 18:36:04 +0000
@@ -17,6 +17,7 @@
 
 """B+Tree indices"""
 
+import cStringIO
 from bisect import bisect_right
 import math
 import tempfile
@@ -60,14 +61,24 @@
     def __init__(self):
         """Create a _BuilderRow."""
         self.nodes = 0
-        self.spool = tempfile.TemporaryFile()
+        self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')
         self.writer = None
 
     def finish_node(self, pad=True):
         byte_lines, _, padding = self.writer.finish()
         if self.nodes == 0:
+            assert self.spool is None
+            self.spool = cStringIO.StringIO()
             # padded note:
             self.spool.write("\x00" * _RESERVED_HEADER_BYTES)
+        elif self.nodes == 1:
+            # We got bigger than 1 node, switch to a temp file
+            assert self.spool is not None
+            spool = tempfile.TemporaryFile(prefix='bzr-index-row-')
+            spool.write(self.spool.getvalue())
+            self.spool = spool
+        else:
+            assert self.spool is not None
         skipped_bytes = 0
         if not pad and padding:
             del byte_lines[-1]
@@ -182,11 +193,9 @@
              backing_pos) = self._spill_mem_keys_and_combine()
         else:
             new_backing_file, size = self._spill_mem_keys_without_combining()
-        dir_path, base_name = osutils.split(new_backing_file.name)
         # Note: The transport here isn't strictly needed, because we will use
         #       direct access to the new_backing._file object
-        new_backing = BTreeGraphIndex(get_transport(dir_path),
-                                      base_name, size)
+        new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)
         # GC will clean up the file
         new_backing._file = new_backing_file
         if self._combine_backing_indices:
@@ -379,13 +388,16 @@
         for row in reversed(rows):
             pad = (type(row) != _LeafBuilderRow)
             row.finish_node(pad=pad)
-        result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
         lines = [_BTSIGNATURE]
         lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
         lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
         lines.append(_OPTION_LEN + str(key_count) + '\n')
         row_lengths = [row.nodes for row in rows]
         lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')
+        if row_lengths and row_lengths[-1] > 1:
+            result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
+        else:
+            result = cStringIO.StringIO()
         result.writelines(lines)
         position = sum(map(len, lines))
         root_row = True



More information about the bazaar-commits mailing list