Rev 4175: Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). in lp:///~jameinel/bzr/1.14-btree_spill

John Arbash Meinel john at arbash-meinel.com
Mon Mar 23 19:35:50 GMT 2009


At lp:///~jameinel/bzr/1.14-btree_spill

------------------------------------------------------------
revno: 4175
revision-id: john at arbash-meinel.com-20090323193538-3d01aetz07jsyd3w
parent: john at arbash-meinel.com-20090323192057-eh1l34z1ab5x3qt4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Mon 2009-03-23 14:35:38 -0500
message:
  Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
  
  Update the Packer code so that it sets combine_backing_indices=False, as we know that
  we won't be making extra queries.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-03-23 19:20:57 +0000
+++ b/bzrlib/btree_index.py	2009-03-23 19:35:38 +0000
@@ -140,7 +140,6 @@
         # Indicate it hasn't been built yet
         self._nodes_by_key = None
         self._optimize_for_size = False
-        self._combine_spilled_indices = True
 
     def add_node(self, key, value, references=()):
         """Add a node to the index.
@@ -181,7 +180,7 @@
         combine mem with the first and second indexes, creating a new one of
         size 4x. On the fifth create a single new one, etc.
         """
-        if self._combine_spilled_indices:
+        if self._combine_backing_indices:
             (new_backing_file, size,
              backing_pos) = self._spill_mem_keys_and_combine()
         else:
@@ -193,7 +192,7 @@
                                       base_name, size)
         # GC will clean up the file
         new_backing._file = new_backing_file
-        if self._combine_spilled_indices:
+        if self._combine_backing_indices:
             if len(self._backing_indices) == backing_pos:
                 self._backing_indices.append(None)
             self._backing_indices[backing_pos] = new_backing

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-02-23 15:29:35 +0000
+++ b/bzrlib/index.py	2009-03-23 19:35:38 +0000
@@ -99,6 +99,7 @@
         self._nodes_by_key = None
         self._key_length = key_elements
         self._optimize_for_size = False
+        self._combine_backing_indices = True
 
     def _check_key(self, key):
         """Raise BadIndexKey if key is not a valid key for this index."""
@@ -315,16 +316,23 @@
                 (len(result.getvalue()), expected_bytes))
         return result
 
-    def set_optimize(self, for_size=True):
+    def set_optimize(self, for_size=None, combine_backing_indices=None):
         """Change how the builder tries to optimize the result.
 
         :param for_size: Tell the builder to try and make the index as small as
             possible.
+        :param combine_backing_indices: If the builder spills to disk to save
+            memory, should the on-disk indices be combined. Set to True if you
+            are going to be probing the index, but to False if you are not. (If
+            you are not querying, then the time spent combining is wasted.)
         :return: None
         """
         # GraphIndexBuilder itself doesn't pay attention to the flag yet, but
         # other builders do.
-        self._optimize_for_size = for_size
+        if for_size is not None:
+            self._optimize_for_size = for_size
+        if combine_backing_indices is not None:
+            self._combine_backing_indices = combine_backing_indices
 
 
 class GraphIndex(object):

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-03-16 05:33:31 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-03-23 19:35:38 +0000
@@ -725,8 +725,15 @@
 
     def open_pack(self):
         """Open a pack for the pack we are creating."""
-        return NewPack(self._pack_collection, upload_suffix=self.suffix,
+        new_pack = NewPack(self._pack_collection, upload_suffix=self.suffix,
                 file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
+        # We know that we will process all nodes in order, and don't need to
+        # query, so don't combine any indices spilled to disk until we are done
+        new_pack.revision_index.set_optimize(combine_backing_indices=False)
+        new_pack.inventory_index.set_optimize(combine_backing_indices=False)
+        new_pack.text_index.set_optimize(combine_backing_indices=False)
+        new_pack.signature_index.set_optimize(combine_backing_indices=False)
+        return new_pack
 
     def _update_pack_order(self, entries, index_to_pack_map):
         """Determine how we want our packs to be ordered.

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2009-03-23 19:20:57 +0000
+++ b/bzrlib/tests/test_btree_index.py	2009-03-23 19:35:38 +0000
@@ -433,7 +433,7 @@
 
     def test_spill_index_stress_1_1_no_combine(self):
         builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
-        builder._combine_spilled_indices = False
+        builder.set_optimize(for_size=False, combine_backing_indices=False)
         nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
         builder.add_node(*nodes[0])
         # Test the parts of the index that take up memory are doing so
@@ -510,6 +510,16 @@
         self.assertTrue(builder._optimize_for_size)
         builder.set_optimize(for_size=False)
         self.assertFalse(builder._optimize_for_size)
+        # test that we can set combine_backing_indices without effecting
+        # _optimize_for_size
+        obj = object()
+        builder._optimize_for_size = obj
+        builder.set_optimize(combine_backing_indices=False)
+        self.assertFalse(builder._combine_backing_indices)
+        self.assertIs(obj, builder._optimize_for_size)
+        builder.set_optimize(combine_backing_indices=True)
+        self.assertTrue(builder._combine_backing_indices)
+        self.assertIs(obj, builder._optimize_for_size)
 
     def test_spill_index_stress_2_2(self):
         # test that references and longer keys don't confuse things.



More information about the bazaar-commits mailing list