Rev 24: Write a converter that can convert packs to btrees inplace. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
Robert Collins
robertc at robertcollins.net
Fri Jul 4 04:04:12 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
------------------------------------------------------------
revno: 24
revision-id: robertc at robertcollins.net-20080704030406-zb19krvf26p3htzy
parent: robertc at robertcollins.net-20080703115714-czomk4m21tfu2ebe
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-07-04 13:04:06 +1000
message:
Write a converter that can convert packs to btrees inplace.
added:
tests/test_repofmt.py test_repofmt.py-20080704030345-bza6rrd6nf4sdmyy-1
modified:
btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
repofmt.py repofmt.py-20080701113732-m1iu3n94ikbxdelb-1
tests/__init__.py __init__.py-20080624222253-p0x5f92uyh5hw734-10
tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-03 07:31:37 +0000
+++ b/btree_index.py 2008-07-04 03:04:06 +0000
@@ -350,7 +350,7 @@
_default_use_blooms = False
- def __init__(self, transport, name, size=None):
+ def __init__(self, transport, name, size):
"""Create a B+Tree index object on the index name.
:param transport: The transport to read data for the index from.
=== modified file 'repofmt.py'
--- a/repofmt.py 2008-07-03 11:57:14 +0000
+++ b/repofmt.py 2008-07-04 03:04:06 +0000
@@ -24,7 +24,9 @@
import md5
import time
-from bzrlib import debug, pack
+from bzrlib import debug, errors, pack
+from bzrlib.index import GraphIndexBuilder
+from bzrlib.inter import InterObject
from bzrlib.plugins.index2.btree_index import BTreeGraphIndex, BTreeBuilder
from bzrlib.knit import (
_KnitGraphIndex,
@@ -426,3 +428,148 @@
"pack-0.92-subtrees\n")
+class InterRepositoryRepositoryFormat(InterObject):
+ """This class is used for conversions from a repository to a new format."""
+
+ _optimisers = []
+ """The available optimisers for conversion."""
+
+
+class InterKnitPackBTreeFormat(InterRepositoryRepositoryFormat):
+ """Convertsions from KnitPack foramts to BTree formats."""
+
+ suffix_types = {
+ '.rix':'revision',
+ '.iix':'inventory',
+ '.six':'signature',
+ '.tix':'text',
+ }
+
+ pivot_renames = [
+ ('indices', 'oldindices'),
+ ('pack-names', 'old-pack-names'),
+ ('newindices', 'indices'),
+ ('new-pack-names', 'pack-names'),
+ ]
+
+ def _create_tmp_index_dir(self):
+ """Create a newindices dir to hold converted indices."""
+ self.source._transport.mkdir('newindices')
+
+ def _remove_tmp_index_dir(self):
+ """Remove a newindices dir if one is present."""
+ try:
+ self.source._transport.delete_tree('newindices')
+ except errors.NoSuchFile:
+ pass
+
+ def _prep_conversion(self):
+ """Prepare to convert data.
+
+ This creates a staging directory for new indices and locks the name
+ list to prevent other writers mutating the repository. There is an
+ inherent race condition because other clients don't recheck the format
+ when refreshing the names list. This can be fixed by changing the names
+ list to be a BTreeIndex as part of the final format merge.
+ """
+ self.lock()
+ self._save_original_format()
+ try:
+ self._create_tmp_index_dir()
+ except errors.FileExists:
+ self._remove_tmp_index_dir()
+ self._create_tmp_index_dir()
+
+ def _cancel_conversion(self):
+ """Cancel a conversion that has not been completed."""
+ for name_to, name_from in reversed(self.pivot_renames):
+ try:
+ self.source._transport.rename(name_from, name_to)
+ except errors.NoSuchFile:
+ pass
+ self._remove_tmp_index_dir()
+ try:
+ self.source._transport.delete('new-pack-names')
+ except errors.NoSuchFile:
+ pass
+ try:
+ self.source._transport.rename('format.original', 'format')
+ except errors.NoSuchFile:
+ pass
+ self.unlock()
+
+ def _complete(self):
+ """Complete a conversion after the _convert has succeeded."""
+ # Ideally, a write barrier here would be useful for correctness.
+ self.source._transport.delete('format.original')
+ self.source._transport.delete('old-pack-names')
+ self.source._transport.delete_tree('oldindices')
+ self.unlock()
+
+ def convert(self):
+ """Convert source to be of format target."""
+ try:
+ self._prep_conversion()
+ self._convert()
+ self._pivot()
+ except:
+ self._cancel_conversion()
+ raise
+ self._complete()
+
+ def _convert(self):
+ """Do the time consuming portions of conversion.
+
+ This should not alter live data, just prepare new disk structures for
+ insertion.
+ """
+ packs = self.source._pack_collection
+ packs.reset()
+ packs.ensure_loaded()
+ indices = list(packs.revision_index.combined_index._indices)
+ indices.extend(packs.signature_index.combined_index._indices)
+ indices.extend(packs.inventory_index.combined_index._indices)
+ indices.extend(packs.text_index.combined_index._indices)
+ index_sizes = {}
+ for index in indices:
+ keys = index.key_count()
+ key_length = index._key_length
+ ref_lists = index.node_ref_lists
+ builder = BTreeBuilder(reference_lists=ref_lists, key_elements=key_length)
+ for node in index.iter_all_entries():
+ builder.add_node(*node[1:])
+ size = self.source._transport.put_file('newindices/' + index._name,
+ builder.finish())
+ sizes = index_sizes.setdefault(index._name[:-4],
+ [None, None, None, None])
+ index_type = self.suffix_types[index._name[-4:]]
+ index_offset = NewPack.index_definitions[index_type][1]
+ sizes[index_offset] = size
+ builder = GraphIndexBuilder()
+ for name, sizes in index_sizes.iteritems():
+ builder.add_node((name,), ' '.join(str(size) for size in sizes))
+ self.source._transport.put_file('new-pack-names', builder.finish())
+
+ def lock(self):
+ self.source._pack_collection.lock_names()
+
+ def unlock(self):
+ self.source._pack_collection._unlock_names()
+
+ def _pivot(self):
+ """Move the new data into place, still allowing rollback."""
+ self._remove_format()
+ for name_from, name_to in self.pivot_renames:
+ self.source._transport.rename(name_from, name_to)
+ self._set_format()
+
+ def _remove_format(self):
+ self.source._transport.delete('format')
+
+ def _save_original_format(self):
+ self.source._transport.put_file('format.original',
+ self.source._transport.get('format'))
+
+ def _set_format(self):
+ self.source._transport.put_bytes('format',
+ self.target.get_format_string())
=== modified file 'tests/__init__.py'
--- a/tests/__init__.py 2008-06-30 23:45:37 +0000
+++ b/tests/__init__.py 2008-07-04 03:04:06 +0000
@@ -24,6 +24,7 @@
'errors',
'btree_index',
'chunk_writer',
+ 'repofmt',
]
standard_tests.addTests(loader.loadTestsFromModuleNames(
['bzrlib.plugins.index2.tests.test_' + name for
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-03 03:45:49 +0000
+++ b/tests/test_btree_index.py 2008-07-04 03:04:06 +0000
@@ -282,7 +282,7 @@
def test_trivial_constructor(self):
transport = get_transport('trace+' + self.get_url(''))
- index = btree_index.BTreeGraphIndex(transport, 'index')
+ index = btree_index.BTreeGraphIndex(transport, 'index', None)
# Checks the page size at load, but that isn't logged yet.
self.assertEqual([], transport._activity)
@@ -296,7 +296,7 @@
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
transport = get_transport('trace+' + self.get_url(''))
transport.put_file('index', builder.finish())
- index = btree_index.BTreeGraphIndex(transport, 'index')
+ index = btree_index.BTreeGraphIndex(transport, 'index', None)
del transport._activity[:]
self.assertEqual([], transport._activity)
self.assertEqual(0, index.key_count())
=== added file 'tests/test_repofmt.py'
--- a/tests/test_repofmt.py 1970-01-01 00:00:00 +0000
+++ b/tests/test_repofmt.py 2008-07-04 03:04:06 +0000
@@ -0,0 +1,333 @@
+# index2, a bzr plugin providing experimental index types.
+# Copyright (C) 2008 Canonical Limited.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""Tests for btree based repository operations."""
+
+import pprint
+import random
+import zlib
+
+from bzrlib import index, tests
+from bzrlib import errors as bzrerrors
+from bzrlib.plugins import index2
+from bzrlib.plugins.index2 import btree_index, errors, repofmt
+from bzrlib.repository import Repository
+from bzrlib.tests import (
+ TestCaseWithTransport,
+ )
+from bzrlib.transport import get_transport
+
+
+class BTreeConverterTestCase(TestCaseWithTransport):
+
+ def setUp(self):
+ TestCaseWithTransport.setUp(self)
+ repo = self.make_repository('.', format='pack-0.92')
+ target = repofmt.RepositoryFormatPackBTreePlain()
+ self.inter = repofmt.InterKnitPackBTreeFormat(repo, target)
+
+ def assertBackupIndices(self):
+ self.assertTrue(self.get_transport().has('.bzr/repository/oldindices'))
+ self.assertTrue(self.get_transport().has('.bzr/repository/old-pack-names'))
+
+ def assertConverted(self):
+ """Test the converted repository by using its indices."""
+ repo = Repository.open(self.get_url('.'))
+ repo.lock_read()
+ list(repo.all_revision_ids())
+ repo.unlock()
+
+ def assertEmptyNewIndices(self):
+ self.assertEqual([],
+ self.get_transport().list_dir('.bzr/repository/newindices'))
+
+ def assertFormatSaved(self):
+ """Check that the repo format has been backed up in case of failure."""
+ self.assertEqual(self.inter.source._format.get_format_string(),
+ self.get_transport().get_bytes('.bzr/repository/format.original'))
+
+ def assertIndicesConverted(self):
+ """Check that newindices has an exact copy of oldindices."""
+ names = self.inter.source._transport.list_dir('indices')
+ newnames = self.inter.source._transport.list_dir('indices')
+ self.assertEqual(names, newnames)
+ transport = self.inter.source._transport
+ sizes = {}
+ for node in index.GraphIndex(transport, 'new-pack-names', None
+ ).iter_all_entries():
+ sizes[node[1][0]] = \
+ self.inter.source._pack_collection._parse_index_sizes(node[2])
+ for name in names:
+ old_size = transport.stat('indices/' + name).st_size
+ old_index = index.GraphIndex(
+ transport.clone('indices'), name, old_size)
+ old_contents = list(old_index.iter_all_entries())
+ new_size = transport.stat('newindices/' + name).st_size
+ new_index = btree_index.BTreeGraphIndex(
+ transport.clone('newindices'), name, new_size)
+ new_contents = list(new_index.iter_all_entries())
+ index_type = self.inter.suffix_types[name[-4:]]
+ index_offset = repofmt.NewPack.index_definitions[index_type][1]
+ self.assertEqual(sizes[name[:-4]][index_offset], new_size)
+
+ def assertNamesLocked(self):
+ control = self.inter.source.control_files
+ self.assertTrue(control.is_locked())
+ self.assertEqual('w', control._lock_mode)
+
+ def assertNamesUnlocked(self):
+ self.assertFalse(self.inter.source.control_files.is_locked())
+
+ def assertNewFormatSet(self):
+ self.assertEqual(self.inter.target.get_format_string(),
+ self.get_transport().get_bytes('.bzr/repository/format'))
+
+ def assertNoFormat(self):
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/format'))
+
+ def assertNoBackupFormat(self):
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/format.original'))
+
+ def assertNoBackupIndices(self):
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/oldindices'))
+
+ def assertNoNewIndices(self):
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/newindices'))
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/new-pack-names'))
+
+ def assertNoNewNames(self):
+ self.assertFalse(self.get_transport().has(
+ '.bzr/repository/new-pack-names'))
+
+ def assertOriginalFormatSet(self):
+ self.assertEqual(self.inter.source._format.get_format_string(),
+ self.get_transport().get_bytes('.bzr/repository/format'))
+
+ def assertUnModified(self):
+ self.assertOriginalFormatSet()
+ self.assertNoNewIndices()
+ self.assertNoNewNames()
+ self.assertNoBackupFormat()
+ self.assertOriginalFormatSet()
+ self.assertNoBackupIndices()
+ self.assertNamesUnlocked()
+
+ def test__create_fresh_tmp_index_dir(self):
+ self.inter._create_tmp_index_dir()
+ self.assertEmptyNewIndices()
+
+ def test__create_existing_tmp_index_dir_error(self):
+ # when there is a tmp index dir already it is not ignored.
+ self.inter._create_tmp_index_dir()
+ self.assertRaises(bzrerrors.FileExists,
+ self.inter._create_tmp_index_dir)
+
+ def test__prep_conversion_clean(self):
+ # starting the conversion physically locks the repo's names list.
+ self.inter._prep_conversion()
+ self.assertNamesLocked()
+ self.assertEmptyNewIndices()
+ self.assertFormatSaved()
+ self.inter.unlock()
+
+ def test__prep_conversion_existing_newindices(self):
+ # when there is a newindices dir already, it is cleaned.
+ self.inter._create_tmp_index_dir()
+ self.inter._prep_conversion()
+ self.assertEmptyNewIndices()
+ self.assertNamesLocked()
+ self.assertFormatSaved()
+ self.inter.unlock()
+
+ def test__remove_tmp_index_dir(self):
+ # if there is no tmp index dir, it does not fail.
+ self.inter._remove_tmp_index_dir()
+
+ def test__cancel_conversion_prepped(self):
+ self.inter._prep_conversion()
+ self.inter._cancel_conversion()
+ self.assertNoNewIndices()
+ self.assertNamesUnlocked()
+ self.assertNoBackupFormat()
+
+ def test__cancel_after_remove(self):
+ self.inter._prep_conversion()
+ self.inter._remove_format()
+ self.inter._cancel_conversion()
+ self.assertOriginalFormatSet()
+
+ def test__cancel_after_lock(self):
+ self.inter.lock()
+ self.assertNamesLocked()
+ self.inter._cancel_conversion()
+ self.assertUnModified()
+
+ def test__cancel_after_format_backup(self):
+ self.inter.lock()
+ self.inter._save_original_format()
+ self.inter._cancel_conversion()
+ self.assertUnModified()
+
+ def test__cancel_after_convert(self):
+ self.inter._prep_conversion()
+ self.inter._convert()
+ self.inter._cancel_conversion()
+ self.assertUnModified()
+
+ def test_cancel_after_pivot(self):
+ self.inter._prep_conversion()
+ self.inter._convert()
+ self.inter._pivot()
+ self.inter._cancel_conversion()
+ self.assertUnModified()
+
+ def test__complete(self):
+ # after converting, complete moves stuff into place
+ self.inter._prep_conversion()
+ self.inter._convert()
+ self.inter._pivot()
+ self.inter._complete()
+ self.assertNamesUnlocked()
+ self.assertNoBackupFormat()
+ self.assertNoBackupIndices()
+ self.assertNewFormatSet()
+ self.assertNoNewNames()
+ self.assertNoNewIndices()
+ self.assertConverted()
+
+ def test__convert_empty(self):
+ self.inter._prep_conversion()
+ self.inter._convert()
+ self.assertIndicesConverted()
+ self.inter.unlock()
+
+ def test__convert(self):
+ self.inter.source.bzrdir.create_branch()
+ tree = self.inter.source.bzrdir.create_workingtree()
+ # create two packs
+ tree.commit('post 1')
+ tree.commit('post 2')
+ self.inter._prep_conversion()
+ self.inter._convert()
+ self.assertIndicesConverted()
+ self.inter.unlock()
+
+ def test__pivot(self):
+ self.inter._prep_conversion()
+ self.inter._convert()
+ # track the calls made
+ self.inter.source._transport = get_transport('trace+' +
+ self.inter.source._transport.base)
+ self.inter._pivot()
+ activity = list(self.inter.source._transport._activity)
+ self.assertBackupIndices()
+ self.assertNoNewIndices()
+ self.assertNewFormatSet()
+ self.inter.unlock()
+ self.assertEqual([
+ ('delete', 'format'),
+ ('rename', 'indices', 'oldindices'),
+ ('rename', 'pack-names', 'old-pack-names'),
+ ('rename', 'newindices', 'indices'),
+ ('rename', 'new-pack-names', 'pack-names'),
+ ('put_bytes', 'format', 59, None),
+ ],
+ activity)
+
+ def test__remove_format(self):
+ self.inter._remove_format()
+ self.assertNoFormat()
+
+ def test__set_format(self):
+ self.inter._remove_format()
+ self.inter._set_format()
+ self.assertNewFormatSet()
+
+ def make_error_log_call(self, name, calls):
+ def log_error():
+ calls.append(name)
+ raise Exception(name)
+ return log_error
+
+ def make_log_call(self, name, calls):
+ def log_call():
+ calls.append(name)
+ return log_call
+
+ def test_convert_noerror(self):
+ # setup lambdas to trap what convert calls:
+ calls = []
+ self.inter._prep_conversion = self.make_log_call('prep', calls)
+ self.inter._convert = self.make_log_call('convert', calls)
+ self.inter._pivot = self.make_log_call('pivot', calls)
+ self.inter._complete = self.make_log_call('complete', calls)
+ self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+ self.inter.convert()
+ self.assertEqual(['prep', 'convert', 'pivot', 'complete'], calls)
+
+ def test_convert_prep_error_cancels(self):
+ # setup lambdas to trap what convert calls:
+ calls = []
+ self.inter._prep_conversion = self.make_error_log_call('prep', calls)
+ self.inter._convert = self.make_log_call('convert', calls)
+ self.inter._pivot = self.make_log_call('pivot', calls)
+ self.inter._complete = self.make_log_call('complete', calls)
+ self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+ error = self.assertRaises(Exception, self.inter.convert)
+ self.assertEqual(error.args, ('prep',))
+ self.assertEqual(['prep', 'cancel'], calls)
+
+ def test_convert_convert_error_cancels(self):
+ # setup lambdas to trap what convert calls:
+ calls = []
+ self.inter._prep_conversion = self.make_log_call('prep', calls)
+ self.inter._convert = self.make_error_log_call('convert', calls)
+ self.inter._pivot = self.make_log_call('pivot', calls)
+ self.inter._complete = self.make_log_call('complete', calls)
+ self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+ error = self.assertRaises(Exception, self.inter.convert)
+ self.assertEqual(error.args, ('convert',))
+ self.assertEqual(['prep', 'convert', 'cancel'], calls)
+
+ def test_convert_pivot_error_cancels(self):
+ # setup lambdas to trap what convert calls:
+ calls = []
+ self.inter._prep_conversion = self.make_log_call('prep', calls)
+ self.inter._convert = self.make_log_call('convert', calls)
+ self.inter._pivot = self.make_error_log_call('pivot', calls)
+ self.inter._complete = self.make_log_call('complete', calls)
+ self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+ error = self.assertRaises(Exception, self.inter.convert)
+ self.assertEqual(error.args, ('pivot',))
+ self.assertEqual(['prep', 'convert', 'pivot', 'cancel'], calls)
+
+ def test_convert_complete_error_does_not_cancel(self):
+ # setup lambdas to trap what convert calls:
+ calls = []
+ self.inter._prep_conversion = self.make_log_call('prep', calls)
+ self.inter._convert = self.make_log_call('convert', calls)
+ self.inter._pivot = self.make_log_call('pivot', calls)
+ self.inter._complete = self.make_error_log_call('complete', calls)
+ self.inter._cancel_conversion = self.make_log_call('cancel', calls)
+ error = self.assertRaises(Exception, self.inter.convert)
+ self.assertEqual(error.args, ('complete',))
+ self.assertEqual(['prep', 'convert', 'pivot', 'complete'], calls)
More information about the bazaar-commits
mailing list