Rev 4187: (jam) Add GraphIndex.set_optimize(combine_backing_indices=False), in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Mar 23 20:25:19 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4187
revision-id: pqm at pqm.ubuntu.com-20090323202515-uwlqu9w037ndukz4
parent: pqm at pqm.ubuntu.com-20090323173729-ngf9reea87ub75x4
parent: john at arbash-meinel.com-20090323193538-3d01aetz07jsyd3w
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-03-23 20:25:15 +0000
message:
(jam) Add GraphIndex.set_optimize(combine_backing_indices=False),
and use it for Packer.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 4168.3.6
revision-id: john at arbash-meinel.com-20090323193538-3d01aetz07jsyd3w
parent: john at arbash-meinel.com-20090323192057-eh1l34z1ab5x3qt4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Mon 2009-03-23 14:35:38 -0500
message:
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
Update the Packer code so that it sets combine_backing_indices=False, as we know that
we won't be making extra queries.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 4168.3.5
revision-id: john at arbash-meinel.com-20090323192057-eh1l34z1ab5x3qt4
parent: john at arbash-meinel.com-20090321024706-drh9950i56vb484i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Mon 2009-03-23 14:20:57 -0500
message:
Check that setting _combine_spilled_indices has the expected effect.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 4168.3.4
revision-id: john at arbash-meinel.com-20090321024706-drh9950i56vb484i
parent: john at arbash-meinel.com-20090320174406-nd2kq4i77fmfu3ss
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Fri 2009-03-20 21:47:06 -0500
message:
Restore the ability to spill, but prepare a flag to disable it.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 4168.3.3
revision-id: john at arbash-meinel.com-20090320174406-nd2kq4i77fmfu3ss
parent: john at arbash-meinel.com-20090319202123-2fravh4wzgt3lp57
parent: john at arbash-meinel.com-20090320165133-glkmnloupanz532h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Fri 2009-03-20 12:44:06 -0500
message:
Merge the NEWS entry for the first part of spill-to-disk.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
------------------------------------------------------------
revno: 4168.3.2
revision-id: john at arbash-meinel.com-20090319202123-2fravh4wzgt3lp57
parent: john at arbash-meinel.com-20090319201050-yii7fgrjvlqn3nuf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Thu 2009-03-19 15:21:23 -0500
message:
Fix the test suite now that we don't combine-on-spill
modified:
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 4168.3.1
revision-id: john at arbash-meinel.com-20090319201050-yii7fgrjvlqn3nuf
parent: john at arbash-meinel.com-20090319183129-fnm26attyu1yw2s0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Thu 2009-03-19 15:10:50 -0500
message:
Set an name prefix for the btree_index temporary files.
Don't combine backing indices automatically, wait for the final pack.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-03-20 16:00:17 +0000
+++ b/bzrlib/btree_index.py 2009-03-23 19:35:38 +0000
@@ -180,6 +180,34 @@
combine mem with the first and second indexes, creating a new one of
size 4x. On the fifth create a single new one, etc.
"""
+ if self._combine_backing_indices:
+ (new_backing_file, size,
+ backing_pos) = self._spill_mem_keys_and_combine()
+ else:
+ new_backing_file, size = self._spill_mem_keys_without_combining()
+ dir_path, base_name = osutils.split(new_backing_file.name)
+ # Note: The transport here isn't strictly needed, because we will use
+ # direct access to the new_backing._file object
+ new_backing = BTreeGraphIndex(get_transport(dir_path),
+ base_name, size)
+ # GC will clean up the file
+ new_backing._file = new_backing_file
+ if self._combine_backing_indices:
+ if len(self._backing_indices) == backing_pos:
+ self._backing_indices.append(None)
+ self._backing_indices[backing_pos] = new_backing
+ for backing_pos in range(backing_pos):
+ self._backing_indices[backing_pos] = None
+ else:
+ self._backing_indices.append(new_backing)
+ self._keys = set()
+ self._nodes = {}
+ self._nodes_by_key = None
+
+ def _spill_mem_keys_without_combining(self):
+ return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
+
+ def _spill_mem_keys_and_combine(self):
iterators_to_combine = [self._iter_mem_nodes()]
pos = -1
for pos, backing in enumerate(self._backing_indices):
@@ -191,21 +219,7 @@
new_backing_file, size = \
self._write_nodes(self._iter_smallest(iterators_to_combine),
allow_optimize=False)
- dir_path, base_name = osutils.split(new_backing_file.name)
- # Note: The transport here isn't strictly needed, because we will use
- # direct access to the new_backing._file object
- new_backing = BTreeGraphIndex(get_transport(dir_path),
- base_name, size)
- # GC will clean up the file
- new_backing._file = new_backing_file
- if len(self._backing_indices) == backing_pos:
- self._backing_indices.append(None)
- self._backing_indices[backing_pos] = new_backing
- for pos in range(backing_pos):
- self._backing_indices[pos] = None
- self._keys = set()
- self._nodes = {}
- self._nodes_by_key = None
+ return new_backing_file, size, backing_pos
def add_nodes(self, nodes):
"""Add nodes to the index.
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-02-23 15:29:35 +0000
+++ b/bzrlib/index.py 2009-03-23 19:35:38 +0000
@@ -99,6 +99,7 @@
self._nodes_by_key = None
self._key_length = key_elements
self._optimize_for_size = False
+ self._combine_backing_indices = True
def _check_key(self, key):
"""Raise BadIndexKey if key is not a valid key for this index."""
@@ -315,16 +316,23 @@
(len(result.getvalue()), expected_bytes))
return result
- def set_optimize(self, for_size=True):
+ def set_optimize(self, for_size=None, combine_backing_indices=None):
"""Change how the builder tries to optimize the result.
:param for_size: Tell the builder to try and make the index as small as
possible.
+ :param combine_backing_indices: If the builder spills to disk to save
+ memory, should the on-disk indices be combined. Set to True if you
+ are going to be probing the index, but to False if you are not. (If
+ you are not querying, then the time spent combining is wasted.)
:return: None
"""
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
# other builders do.
- self._optimize_for_size = for_size
+ if for_size is not None:
+ self._optimize_for_size = for_size
+ if combine_backing_indices is not None:
+ self._combine_backing_indices = combine_backing_indices
class GraphIndex(object):
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-03-16 05:33:31 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-03-23 19:35:38 +0000
@@ -725,8 +725,15 @@
def open_pack(self):
"""Open a pack for the pack we are creating."""
- return NewPack(self._pack_collection, upload_suffix=self.suffix,
+ new_pack = NewPack(self._pack_collection, upload_suffix=self.suffix,
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
+ # We know that we will process all nodes in order, and don't need to
+ # query, so don't combine any indices spilled to disk until we are done
+ new_pack.revision_index.set_optimize(combine_backing_indices=False)
+ new_pack.inventory_index.set_optimize(combine_backing_indices=False)
+ new_pack.text_index.set_optimize(combine_backing_indices=False)
+ new_pack.signature_index.set_optimize(combine_backing_indices=False)
+ return new_pack
def _update_pack_order(self, entries, index_to_pack_map):
"""Determine how we want our packs to be ordered.
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-03-07 06:58:17 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-03-23 19:35:38 +0000
@@ -431,12 +431,95 @@
self.assertEqual(sorted(nodes), nodes)
self.assertEqual(16, len(nodes))
+ def test_spill_index_stress_1_1_no_combine(self):
+ builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
+ builder.set_optimize(for_size=False, combine_backing_indices=False)
+ nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
+ builder.add_node(*nodes[0])
+ # Test the parts of the index that take up memory are doing so
+ # predictably.
+ self.assertEqual(1, len(builder._nodes))
+ self.assertEqual(1, len(builder._keys))
+ self.assertIs(None, builder._nodes_by_key)
+ builder.add_node(*nodes[1])
+ self.assertEqual(0, len(builder._nodes))
+ self.assertEqual(0, len(builder._keys))
+ self.assertIs(None, builder._nodes_by_key)
+ self.assertEqual(1, len(builder._backing_indices))
+ self.assertEqual(2, builder._backing_indices[0].key_count())
+ # now back to memory
+ builder.add_node(*nodes[2])
+ self.assertEqual(1, len(builder._nodes))
+ self.assertEqual(1, len(builder._keys))
+ self.assertIs(None, builder._nodes_by_key)
+ # And spills to a second backing index but doesn't combine
+ builder.add_node(*nodes[3])
+ self.assertEqual(0, len(builder._nodes))
+ self.assertEqual(0, len(builder._keys))
+ self.assertIs(None, builder._nodes_by_key)
+ self.assertEqual(2, len(builder._backing_indices))
+ for backing_index in builder._backing_indices:
+ self.assertEqual(2, backing_index.key_count())
+ # The next spills to the 3rd slot
+ builder.add_node(*nodes[4])
+ builder.add_node(*nodes[5])
+ self.assertEqual(0, len(builder._nodes))
+ self.assertEqual(0, len(builder._keys))
+ self.assertIs(None, builder._nodes_by_key)
+ self.assertEqual(3, len(builder._backing_indices))
+ for backing_index in builder._backing_indices:
+ self.assertEqual(2, backing_index.key_count())
+ # Now spill a few more, and check that we don't combine
+ builder.add_node(*nodes[6])
+ builder.add_node(*nodes[7])
+ builder.add_node(*nodes[8])
+ builder.add_node(*nodes[9])
+ builder.add_node(*nodes[10])
+ builder.add_node(*nodes[11])
+ builder.add_node(*nodes[12])
+ self.assertEqual(6, len(builder._backing_indices))
+ for backing_index in builder._backing_indices:
+ self.assertEqual(2, backing_index.key_count())
+ # Test that memory and disk are both used for query methods; and that
+ # None is skipped over happily.
+ self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
+ list(builder.iter_all_entries()))
+ # Two nodes - one memory one disk
+ self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
+ set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
+ self.assertEqual(13, builder.key_count())
+ self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
+ set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
+ builder.add_node(*nodes[13])
+ builder.add_node(*nodes[14])
+ builder.add_node(*nodes[15])
+ self.assertEqual(8, len(builder._backing_indices))
+ for backing_index in builder._backing_indices:
+ self.assertEqual(2, backing_index.key_count())
+ # Now finish, and check we got a correctly ordered tree
+ transport = self.get_transport('')
+ size = transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ nodes = list(index.iter_all_entries())
+ self.assertEqual(sorted(nodes), nodes)
+ self.assertEqual(16, len(nodes))
+
def test_set_optimize(self):
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
builder.set_optimize(for_size=True)
self.assertTrue(builder._optimize_for_size)
builder.set_optimize(for_size=False)
self.assertFalse(builder._optimize_for_size)
+ # test that we can set combine_backing_indices without effecting
+ # _optimize_for_size
+ obj = object()
+ builder._optimize_for_size = obj
+ builder.set_optimize(combine_backing_indices=False)
+ self.assertFalse(builder._combine_backing_indices)
+ self.assertIs(obj, builder._optimize_for_size)
+ builder.set_optimize(combine_backing_indices=True)
+ self.assertTrue(builder._combine_backing_indices)
+ self.assertIs(obj, builder._optimize_for_size)
def test_spill_index_stress_2_2(self):
# test that references and longer keys don't confuse things.
More information about the bazaar-commits
mailing list