Rev 24: Write a converter that can convert packs to btrees inplace. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

Robert Collins robertc at robertcollins.net
Fri Jul 4 04:04:12 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

------------------------------------------------------------
revno: 24
revision-id: robertc at robertcollins.net-20080704030406-zb19krvf26p3htzy
parent: robertc at robertcollins.net-20080703115714-czomk4m21tfu2ebe
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-07-04 13:04:06 +1000
message:
  Write a converter that can convert packs to btrees inplace.
added:
  tests/test_repofmt.py          test_repofmt.py-20080704030345-bza6rrd6nf4sdmyy-1
modified:
  btree_index.py                 index.py-20080624222253-p0x5f92uyh5hw734-7
  repofmt.py                     repofmt.py-20080701113732-m1iu3n94ikbxdelb-1
  tests/__init__.py              __init__.py-20080624222253-p0x5f92uyh5hw734-10
  tests/test_btree_index.py      test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-03 07:31:37 +0000
+++ b/btree_index.py	2008-07-04 03:04:06 +0000
@@ -350,7 +350,7 @@
 
     _default_use_blooms = False
 
-    def __init__(self, transport, name, size=None):
+    def __init__(self, transport, name, size):
         """Create a B+Tree index object on the index name.
 
         :param transport: The transport to read data for the index from.

=== modified file 'repofmt.py'
--- a/repofmt.py	2008-07-03 11:57:14 +0000
+++ b/repofmt.py	2008-07-04 03:04:06 +0000
@@ -24,7 +24,9 @@
 import md5
 import time
 
-from bzrlib import debug, pack
+from bzrlib import debug, errors, pack
+from bzrlib.index import GraphIndexBuilder
+from bzrlib.inter import InterObject
 from bzrlib.plugins.index2.btree_index import BTreeGraphIndex, BTreeBuilder
 from bzrlib.knit import (
     _KnitGraphIndex,
@@ -426,3 +428,148 @@
             "pack-0.92-subtrees\n")
 
 
+class InterRepositoryRepositoryFormat(InterObject):
+    """This class is used for conversions from a repository to a new format."""
+
+    _optimisers = []
+    """The available optimisers for conversion."""
+
+
+class InterKnitPackBTreeFormat(InterRepositoryRepositoryFormat):
+    """Convertsions from KnitPack foramts to BTree formats."""
+
+    suffix_types = {
+        '.rix':'revision',
+        '.iix':'inventory',
+        '.six':'signature',
+        '.tix':'text',
+    }
+
+    pivot_renames = [
+        ('indices', 'oldindices'),
+        ('pack-names', 'old-pack-names'),
+        ('newindices', 'indices'),
+        ('new-pack-names', 'pack-names'),
+        ]
+
+    def _create_tmp_index_dir(self):
+        """Create a newindices dir to hold converted indices."""
+        self.source._transport.mkdir('newindices')
+
+    def _remove_tmp_index_dir(self):
+        """Remove a newindices dir if one is present."""
+        try:
+            self.source._transport.delete_tree('newindices')
+        except errors.NoSuchFile:
+            pass
+
+    def _prep_conversion(self):
+        """Prepare to convert data.
+        
+        This creates a staging directory for new indices and locks the name
+        list to prevent other writers mutating the repository. There is an
+        inherent race condition because other clients don't recheck the format
+        when refreshing the names list. This can be fixed by changing the names
+        list to be a BTreeIndex as part of the final format merge.
+        """
+        self.lock()
+        self._save_original_format()
+        try:
+            self._create_tmp_index_dir()
+        except errors.FileExists:
+            self._remove_tmp_index_dir()
+            self._create_tmp_index_dir()
+
+    def _cancel_conversion(self):
+        """Cancel a conversion that has not been completed."""
+        for name_to, name_from in reversed(self.pivot_renames):
+            try:
+                self.source._transport.rename(name_from, name_to)
+            except errors.NoSuchFile:
+                pass
+        self._remove_tmp_index_dir()
+        try:
+            self.source._transport.delete('new-pack-names')
+        except errors.NoSuchFile:
+            pass
+        try:
+            self.source._transport.rename('format.original', 'format')
+        except errors.NoSuchFile:
+            pass
+        self.unlock()
+
+    def _complete(self):
+        """Complete a conversion after the _convert has succeeded."""
+        # Ideally, a write barrier here would be useful for correctness.
+        self.source._transport.delete('format.original')
+        self.source._transport.delete('old-pack-names')
+        self.source._transport.delete_tree('oldindices')
+        self.unlock()
+
+    def convert(self):
+        """Convert source to be of format target."""
+        try:
+            self._prep_conversion()
+            self._convert()
+            self._pivot()
+        except:
+            self._cancel_conversion()
+            raise
+        self._complete()
+
+    def _convert(self):
+        """Do the time consuming portions of conversion.
+
+        This should not alter live data, just prepare new disk structures for
+        insertion.
+        """
+        packs =  self.source._pack_collection
+        packs.reset()
+        packs.ensure_loaded()
+        indices = list(packs.revision_index.combined_index._indices)
+        indices.extend(packs.signature_index.combined_index._indices)
+        indices.extend(packs.inventory_index.combined_index._indices)
+        indices.extend(packs.text_index.combined_index._indices)
+        index_sizes = {}
+        for index in indices:
+            keys = index.key_count()
+            key_length = index._key_length
+            ref_lists = index.node_ref_lists
+            builder = BTreeBuilder(reference_lists=ref_lists, key_elements=key_length)
+            for node in index.iter_all_entries():
+                builder.add_node(*node[1:])
+            size = self.source._transport.put_file('newindices/' + index._name,
+                builder.finish())
+            sizes = index_sizes.setdefault(index._name[:-4],
+                [None, None, None, None])
+            index_type = self.suffix_types[index._name[-4:]]
+            index_offset = NewPack.index_definitions[index_type][1]
+            sizes[index_offset] = size
+        builder = GraphIndexBuilder()
+        for name, sizes in index_sizes.iteritems():
+            builder.add_node((name,), ' '.join(str(size) for size in sizes))
+        self.source._transport.put_file('new-pack-names', builder.finish())
+
+    def lock(self):
+        self.source._pack_collection.lock_names()
+
+    def unlock(self):
+        self.source._pack_collection._unlock_names()
+
+    def _pivot(self):
+        """Move the new data into place, still allowing rollback."""
+        self._remove_format()
+        for name_from, name_to in self.pivot_renames:
+            self.source._transport.rename(name_from, name_to)
+        self._set_format()
+
+    def _remove_format(self):
+        self.source._transport.delete('format')
+
+    def _save_original_format(self):
+        self.source._transport.put_file('format.original',
+            self.source._transport.get('format'))
+
+    def _set_format(self):
+        self.source._transport.put_bytes('format',
+            self.target.get_format_string())

=== modified file 'tests/__init__.py'
--- a/tests/__init__.py	2008-06-30 23:45:37 +0000
+++ b/tests/__init__.py	2008-07-04 03:04:06 +0000
@@ -24,6 +24,7 @@
         'errors',
         'btree_index',
         'chunk_writer',
+        'repofmt',
         ]
     standard_tests.addTests(loader.loadTestsFromModuleNames(
         ['bzrlib.plugins.index2.tests.test_' + name for 

=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py	2008-07-03 03:45:49 +0000
+++ b/tests/test_btree_index.py	2008-07-04 03:04:06 +0000
@@ -282,7 +282,7 @@
 
     def test_trivial_constructor(self):
         transport = get_transport('trace+' + self.get_url(''))
-        index = btree_index.BTreeGraphIndex(transport, 'index')
+        index = btree_index.BTreeGraphIndex(transport, 'index', None)
         # Checks the page size at load, but that isn't logged yet.
         self.assertEqual([], transport._activity)
 
@@ -296,7 +296,7 @@
         builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
         transport = get_transport('trace+' + self.get_url(''))
         transport.put_file('index', builder.finish())
-        index = btree_index.BTreeGraphIndex(transport, 'index')
+        index = btree_index.BTreeGraphIndex(transport, 'index', None)
         del transport._activity[:]
         self.assertEqual([], transport._activity)
         self.assertEqual(0, index.key_count())

=== added file 'tests/test_repofmt.py'
--- a/tests/test_repofmt.py	1970-01-01 00:00:00 +0000
+++ b/tests/test_repofmt.py	2008-07-04 03:04:06 +0000
@@ -0,0 +1,333 @@
+# index2, a bzr plugin providing experimental index types.
+# Copyright (C) 2008 Canonical Limited.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+# 
+
+"""Tests for btree based repository operations."""
+
+import pprint
+import random
+import zlib
+
+from bzrlib import index, tests
+from bzrlib import errors as bzrerrors
+from bzrlib.plugins import index2
+from bzrlib.plugins.index2 import btree_index, errors, repofmt
+from bzrlib.repository import Repository
+from bzrlib.tests import (
+    TestCaseWithTransport,
+    )
+from bzrlib.transport import get_transport
+
+
+class BTreeConverterTestCase(TestCaseWithTransport):
+
+    def setUp(self):
+        TestCaseWithTransport.setUp(self)
+        repo = self.make_repository('.', format='pack-0.92')
+        target = repofmt.RepositoryFormatPackBTreePlain()
+        self.inter = repofmt.InterKnitPackBTreeFormat(repo, target)
+
+    def assertBackupIndices(self):
+        self.assertTrue(self.get_transport().has('.bzr/repository/oldindices'))
+        self.assertTrue(self.get_transport().has('.bzr/repository/old-pack-names'))
+
+    def assertConverted(self):
+        """Test the converted repository by using its indices."""
+        repo = Repository.open(self.get_url('.'))
+        repo.lock_read()
+        list(repo.all_revision_ids())
+        repo.unlock()
+    
+    def assertEmptyNewIndices(self):
+        self.assertEqual([],
+            self.get_transport().list_dir('.bzr/repository/newindices'))
+
+    def assertFormatSaved(self):
+        """Check that the repo format has been backed up in case of failure."""
+        self.assertEqual(self.inter.source._format.get_format_string(),
+            self.get_transport().get_bytes('.bzr/repository/format.original'))
+
+    def assertIndicesConverted(self):
+        """Check that newindices has an exact copy of oldindices."""
+        names = self.inter.source._transport.list_dir('indices')
+        newnames = self.inter.source._transport.list_dir('indices')
+        self.assertEqual(names, newnames)
+        transport = self.inter.source._transport
+        sizes = {}
+        for node in index.GraphIndex(transport, 'new-pack-names', None
+            ).iter_all_entries():
+            sizes[node[1][0]] = \
+                self.inter.source._pack_collection._parse_index_sizes(node[2])
+        for name in names:
+            old_size = transport.stat('indices/' + name).st_size
+            old_index = index.GraphIndex(
+                transport.clone('indices'), name, old_size)
+            old_contents = list(old_index.iter_all_entries())
+            new_size = transport.stat('newindices/' + name).st_size
+            new_index = btree_index.BTreeGraphIndex(
+                transport.clone('newindices'), name, new_size)
+            new_contents = list(new_index.iter_all_entries())
+            index_type = self.inter.suffix_types[name[-4:]]
+            index_offset = repofmt.NewPack.index_definitions[index_type][1]
+            self.assertEqual(sizes[name[:-4]][index_offset], new_size)
+
+    def assertNamesLocked(self):
+        control = self.inter.source.control_files
+        self.assertTrue(control.is_locked())
+        self.assertEqual('w', control._lock_mode)
+
+    def assertNamesUnlocked(self):
+        self.assertFalse(self.inter.source.control_files.is_locked())
+
+    def assertNewFormatSet(self):
+        self.assertEqual(self.inter.target.get_format_string(),
+            self.get_transport().get_bytes('.bzr/repository/format'))
+
+    def assertNoFormat(self):
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/format'))
+
+    def assertNoBackupFormat(self):
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/format.original'))
+
+    def assertNoBackupIndices(self):
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/oldindices'))
+
+    def assertNoNewIndices(self):
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/newindices'))
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/new-pack-names'))
+
+    def assertNoNewNames(self):
+        self.assertFalse(self.get_transport().has(
+            '.bzr/repository/new-pack-names'))
+
+    def assertOriginalFormatSet(self):
+        self.assertEqual(self.inter.source._format.get_format_string(),
+            self.get_transport().get_bytes('.bzr/repository/format'))
+
+    def assertUnModified(self):
+        self.assertOriginalFormatSet()
+        self.assertNoNewIndices()
+        self.assertNoNewNames()
+        self.assertNoBackupFormat()
+        self.assertOriginalFormatSet()
+        self.assertNoBackupIndices()
+        self.assertNamesUnlocked()
+
+    def test__create_fresh_tmp_index_dir(self):
+        self.inter._create_tmp_index_dir()
+        self.assertEmptyNewIndices()
+
+    def test__create_existing_tmp_index_dir_error(self):
+        # when there is a tmp index dir already it is not ignored.
+        self.inter._create_tmp_index_dir()
+        self.assertRaises(bzrerrors.FileExists,
+            self.inter._create_tmp_index_dir)
+
+    def test__prep_conversion_clean(self):
+        # starting the conversion physically locks the repo's names list.
+        self.inter._prep_conversion()
+        self.assertNamesLocked()
+        self.assertEmptyNewIndices()
+        self.assertFormatSaved()
+        self.inter.unlock()
+
+    def test__prep_conversion_existing_newindices(self):
+        # when there is a newindices dir already, it is cleaned.
+        self.inter._create_tmp_index_dir()
+        self.inter._prep_conversion()
+        self.assertEmptyNewIndices()
+        self.assertNamesLocked()
+        self.assertFormatSaved()
+        self.inter.unlock()
+
+    def test__remove_tmp_index_dir(self):
+        # if there is no tmp index dir, it does not fail.
+        self.inter._remove_tmp_index_dir()
+
+    def test__cancel_conversion_prepped(self):
+        self.inter._prep_conversion()
+        self.inter._cancel_conversion()
+        self.assertNoNewIndices()
+        self.assertNamesUnlocked()
+        self.assertNoBackupFormat()
+
+    def test__cancel_after_remove(self):
+        self.inter._prep_conversion()
+        self.inter._remove_format()
+        self.inter._cancel_conversion()
+        self.assertOriginalFormatSet()
+
+    def test__cancel_after_lock(self):
+        self.inter.lock()
+        self.assertNamesLocked()
+        self.inter._cancel_conversion()
+        self.assertUnModified()
+
+    def test__cancel_after_format_backup(self):
+        self.inter.lock()
+        self.inter._save_original_format()
+        self.inter._cancel_conversion()
+        self.assertUnModified()
+
+    def test__cancel_after_convert(self):
+        self.inter._prep_conversion()
+        self.inter._convert()
+        self.inter._cancel_conversion()
+        self.assertUnModified()
+
+    def test_cancel_after_pivot(self):
+        self.inter._prep_conversion()
+        self.inter._convert()
+        self.inter._pivot()
+        self.inter._cancel_conversion()
+        self.assertUnModified()
+
+    def test__complete(self):
+        # after converting, complete moves stuff into place
+        self.inter._prep_conversion()
+        self.inter._convert()
+        self.inter._pivot()
+        self.inter._complete()
+        self.assertNamesUnlocked()
+        self.assertNoBackupFormat()
+        self.assertNoBackupIndices()
+        self.assertNewFormatSet()
+        self.assertNoNewNames()
+        self.assertNoNewIndices()
+        self.assertConverted()
+
+    def test__convert_empty(self):
+        self.inter._prep_conversion()
+        self.inter._convert()
+        self.assertIndicesConverted()
+        self.inter.unlock()
+
+    def test__convert(self):
+        self.inter.source.bzrdir.create_branch()
+        tree = self.inter.source.bzrdir.create_workingtree()
+        # create two packs
+        tree.commit('post 1')
+        tree.commit('post 2')
+        self.inter._prep_conversion()
+        self.inter._convert()
+        self.assertIndicesConverted()
+        self.inter.unlock()
+
+    def test__pivot(self):
+        self.inter._prep_conversion()
+        self.inter._convert()
+        # track the calls made
+        self.inter.source._transport = get_transport('trace+' +
+            self.inter.source._transport.base)
+        self.inter._pivot()
+        activity = list(self.inter.source._transport._activity)
+        self.assertBackupIndices()
+        self.assertNoNewIndices()
+        self.assertNewFormatSet()
+        self.inter.unlock()
+        self.assertEqual([
+            ('delete', 'format'),
+            ('rename', 'indices', 'oldindices'),
+            ('rename', 'pack-names', 'old-pack-names'),
+            ('rename', 'newindices', 'indices'),
+            ('rename', 'new-pack-names', 'pack-names'),
+            ('put_bytes', 'format', 59, None),
+            ],
+            activity)
+
+    def test__remove_format(self):
+        self.inter._remove_format()
+        self.assertNoFormat()
+
+    def test__set_format(self):
+        self.inter._remove_format()
+        self.inter._set_format()
+        self.assertNewFormatSet()
+
+    def make_error_log_call(self, name, calls):
+        def log_error():
+            calls.append(name)
+            raise Exception(name)
+        return log_error
+
+    def make_log_call(self, name, calls):
+        def log_call():
+            calls.append(name)
+        return log_call
+
+    def test_convert_noerror(self):
+        # setup lambdas to trap what convert calls:
+        calls = []
+        self.inter._prep_conversion = self.make_log_call('prep', calls)
+        self.inter._convert = self.make_log_call('convert', calls)
+        self.inter._pivot = self.make_log_call('pivot', calls)
+        self.inter._complete = self.make_log_call('complete', calls)
+        self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+        self.inter.convert()
+        self.assertEqual(['prep', 'convert', 'pivot', 'complete'], calls)
+
+    def test_convert_prep_error_cancels(self):
+        # setup lambdas to trap what convert calls:
+        calls = []
+        self.inter._prep_conversion = self.make_error_log_call('prep', calls)
+        self.inter._convert = self.make_log_call('convert', calls)
+        self.inter._pivot = self.make_log_call('pivot', calls)
+        self.inter._complete = self.make_log_call('complete', calls)
+        self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+        error = self.assertRaises(Exception, self.inter.convert)
+        self.assertEqual(error.args, ('prep',))
+        self.assertEqual(['prep', 'cancel'], calls)
+
+    def test_convert_convert_error_cancels(self):
+        # setup lambdas to trap what convert calls:
+        calls = []
+        self.inter._prep_conversion = self.make_log_call('prep', calls)
+        self.inter._convert = self.make_error_log_call('convert', calls)
+        self.inter._pivot = self.make_log_call('pivot', calls)
+        self.inter._complete = self.make_log_call('complete', calls)
+        self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+        error = self.assertRaises(Exception, self.inter.convert)
+        self.assertEqual(error.args, ('convert',))
+        self.assertEqual(['prep', 'convert', 'cancel'], calls)
+
+    def test_convert_pivot_error_cancels(self):
+        # setup lambdas to trap what convert calls:
+        calls = []
+        self.inter._prep_conversion = self.make_log_call('prep', calls)
+        self.inter._convert = self.make_log_call('convert', calls)
+        self.inter._pivot = self.make_error_log_call('pivot', calls)
+        self.inter._complete = self.make_log_call('complete', calls)
+        self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+        error = self.assertRaises(Exception, self.inter.convert)
+        self.assertEqual(error.args, ('pivot',))
+        self.assertEqual(['prep', 'convert', 'pivot', 'cancel'], calls)
+
+    def test_convert_complete_error_does_not_cancel(self):
+        # setup lambdas to trap what convert calls:
+        calls = []
+        self.inter._prep_conversion = self.make_log_call('prep', calls)
+        self.inter._convert = self.make_log_call('convert', calls)
+        self.inter._pivot = self.make_log_call('pivot', calls)
+        self.inter._complete = self.make_error_log_call('complete', calls)
+        self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+        error = self.assertRaises(Exception, self.inter.convert)
+        self.assertEqual(error.args, ('complete',))
+        self.assertEqual(['prep', 'convert', 'pivot', 'complete'], calls)




More information about the bazaar-commits mailing list