Rev 4709: Use a cStringIO.StringIO for 1-page btree indexes. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b1-small_btree_no_disk
John Arbash Meinel
john at arbash-meinel.com
Mon Sep 21 19:36:12 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b1-small_btree_no_disk
------------------------------------------------------------
revno: 4709
revision-id: john at arbash-meinel.com-20090921183604-p1g8svzkwklc9bsw
parent: pqm at pqm.ubuntu.com-20090919230718-vm9zqiov9h3t9bzg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b1-small_btree_no_disk
timestamp: Mon 2009-09-21 13:36:04 -0500
message:
Use a cStringIO.StringIO for 1-page btree indexes.
bzr-search creates a *lot* of tiny indices (on a big project, it isn't uncommon
to have 200k indices.)
This means we generate 2 temp files for every index, which is overkill.
Changing it to use a StringIO while we have <= 1 page ends up being a big win.
Indexing just 'bzr-search' itself drops from 4.3s => 1.0s on Windows.
(Especially since temp files are more expensive here.)
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-09-14 01:48:28 +0000
+++ b/bzrlib/btree_index.py 2009-09-21 18:36:04 +0000
@@ -17,6 +17,7 @@
"""B+Tree indices"""
+import cStringIO
from bisect import bisect_right
import math
import tempfile
@@ -60,14 +61,24 @@
def __init__(self):
"""Create a _BuilderRow."""
self.nodes = 0
- self.spool = tempfile.TemporaryFile()
+ self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')
self.writer = None
def finish_node(self, pad=True):
byte_lines, _, padding = self.writer.finish()
if self.nodes == 0:
+ assert self.spool is None
+ self.spool = cStringIO.StringIO()
# padded note:
self.spool.write("\x00" * _RESERVED_HEADER_BYTES)
+ elif self.nodes == 1:
+ # We got bigger than 1 node, switch to a temp file
+ assert self.spool is not None
+ spool = tempfile.TemporaryFile(prefix='bzr-index-row-')
+ spool.write(self.spool.getvalue())
+ self.spool = spool
+ else:
+ assert self.spool is not None
skipped_bytes = 0
if not pad and padding:
del byte_lines[-1]
@@ -182,11 +193,9 @@
backing_pos) = self._spill_mem_keys_and_combine()
else:
new_backing_file, size = self._spill_mem_keys_without_combining()
- dir_path, base_name = osutils.split(new_backing_file.name)
# Note: The transport here isn't strictly needed, because we will use
# direct access to the new_backing._file object
- new_backing = BTreeGraphIndex(get_transport(dir_path),
- base_name, size)
+ new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)
# GC will clean up the file
new_backing._file = new_backing_file
if self._combine_backing_indices:
@@ -379,13 +388,16 @@
for row in reversed(rows):
pad = (type(row) != _LeafBuilderRow)
row.finish_node(pad=pad)
- result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
lines = [_BTSIGNATURE]
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
lines.append(_OPTION_LEN + str(key_count) + '\n')
row_lengths = [row.nodes for row in rows]
lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')
+ if row_lengths and row_lengths[-1] > 1:
+ result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
+ else:
+ result = cStringIO.StringIO()
result.writelines(lines)
position = sum(map(len, lines))
root_row = True
More information about the bazaar-commits
mailing list