[MERGE] 'knit' repository format [to reduce size of versioned-file merge]
John Arbash Meinel
john at arbash-meinel.com
Wed Feb 22 05:23:08 GMT 2006
Robert Collins wrote:
> This patch adds the new repository format and upgrade logic that will be
> needed to activate knits when the versioned file merge is complete. It
> adds the UI to upgrade to knits, the logic to convert repositories
> within a meta-dir and matching tests.
>
> Rob
>
...
Rather than continually carry on this semi-hack. Why not just create a
Repository4, Repository5, Repository6 formats.
They don't really share a lot of code. (6 could inherit from 5 and just
set self._prefixed=True.
It just seems like this code chunk keeps getting moved around. And it
isn't all that great to start with. It just won't die. (And yes, I wrote
the initial version :)
> +class AllInOneRepository(Repository):
> + """Legacy support - the repository behaviour for all-in-one branches."""
> +
> + def __init__(self, _format, a_bzrdir, revision_store):
> + # we reuse one control files instance.
> + dir_mode = a_bzrdir._control_files._dir_mode
> + file_mode = a_bzrdir._control_files._file_mode
> +
> + def get_weave(name, prefixed=False):
> + if name:
> + name = safe_unicode(name)
> + else:
> + name = ''
> + relpath = a_bzrdir._control_files._escape(name)
> + weave_transport = a_bzrdir._control_files._transport.clone(relpath)
> + ws = WeaveStore(weave_transport, prefixed=prefixed,
> + dir_mode=dir_mode,
> + file_mode=file_mode)
> + if a_bzrdir._control_files._transport.should_cache():
> + ws.enable_cache = True
> + return ws
> +
> + def get_store(name, compressed=True, prefixed=False):
> + # FIXME: This approach of assuming stores are all entirely compressed
> + # or entirely uncompressed is tidy, but breaks upgrade from
> + # some existing branches where there's a mixture; we probably
> + # still want the option to look for both.
> + relpath = a_bzrdir._control_files._escape(name)
> + store = TextStore(a_bzrdir._control_files._transport.clone(relpath),
> + prefixed=prefixed, compressed=compressed,
> + dir_mode=dir_mode,
> + file_mode=file_mode)
> + #if self._transport.should_cache():
> + # cache_path = os.path.join(self.cache_root, name)
> + # os.mkdir(cache_path)
> + # store = bzrlib.store.CachedStore(store, cache_path)
> + return store
> +
> + # not broken out yet because the controlweaves|inventory_store
> + # and text_store | weave_store bits are still different.
> + if isinstance(_format, RepositoryFormat4):
> + self.inventory_store = get_store('inventory-store')
> + self.text_store = get_store('text-store')
> + elif isinstance(_format, RepositoryFormat5):
> + self.control_weaves = get_weave('')
> + self.weave_store = get_weave('weaves')
> + elif isinstance(_format, RepositoryFormat6):
> + self.control_weaves = get_weave('')
> + self.weave_store = get_weave('weaves', prefixed=True)
> + else:
> + raise errors.BzrError('unreachable code: unexpected repository'
> + ' format.')
> + revision_store.register_suffix('sig')
> + super(AllInOneRepository, self).__init__(_format, a_bzrdir, a_bzrdir._control_files, revision_store)
> +
> +
> +class MetaDirRepository(Repository):
> + """Repositories in the new meta-dir layout."""
> +
> + def __init__(self, _format, a_bzrdir, control_files, revision_store):
> + super(MetaDirRepository, self).__init__(_format,
> + a_bzrdir,
> + control_files,
> + revision_store)
> +
> + dir_mode = self.control_files._dir_mode
> + file_mode = self.control_files._file_mode
> +
> + def get_weave(name, prefixed=False):
> + if name:
> + name = safe_unicode(name)
> + else:
> + name = ''
> + relpath = self.control_files._escape(name)
> + weave_transport = self.control_files._transport.clone(relpath)
> + ws = WeaveStore(weave_transport, prefixed=prefixed,
> + dir_mode=dir_mode,
> + file_mode=file_mode)
> + if self.control_files._transport.should_cache():
> + ws.enable_cache = True
> + return ws
> +
> + if isinstance(self._format, RepositoryFormat7):
> + self.control_weaves = get_weave('')
> + self.weave_store = get_weave('weaves', prefixed=True)
> + elif isinstance(self._format, RepositoryFormatKnit1):
> + self.control_weaves = get_weave('')
> + self.weave_store = get_weave('knits', prefixed=True)
> + else:
> + raise errors.BzrError('unreachable code: unexpected repository'
> + ' format.')
> +
Yipes! Now it has even spawned a friend.
And why is RepositoryFormatKnit creating a WeaveStore. I thought it had
to create a KnitStore, or something.
> +
> class RepositoryFormat(object):
> """A repository format.
>
> @@ -562,6 +602,35 @@
> """
> raise NotImplementedError(self.get_format_string)
>
something isn't right here. You have '_get_revision_store' raising
NotImplemented for _get_rev_store. And if you have _get_rev_store() it
seems like the other code should use it, rather than using get_weave()
and get_store().
> + def _get_revision_store(self, repo_transport, control_files):
> + """Return the revision store object for this a_bzrdir."""
> + raise NotImplementedError(self._get_rev_store)
> +
> + def _get_rev_store(self,
> + transport,
> + control_files,
> + name,
> + compressed=True,
> + prefixed=False):
> + """Common logic for getting a revision store for a repository.
> +
> + see self._get_revision_store for the method to
> + get the store for a repository.
> + """
> + if name:
> + name = safe_unicode(name)
> + else:
> + name = ''
> + dir_mode = control_files._dir_mode
> + file_mode = control_files._file_mode
> + revision_store =TextStore(transport.clone(name),
> + prefixed=prefixed,
> + compressed=compressed,
> + dir_mode=dir_mode,
> + file_mode=file_mode)
> + revision_store.register_suffix('sig')
> + return revision_store
> +
> def initialize(self, a_bzrdir, shared=False):
> """Initialize a repository of this format in a_bzrdir.
>
...
This seems like it should at least have a comment that we will be
replacing the 'from bzrlib.weave import Weave' to importing something else.
> +class RepositoryFormatKnit1(MetaDirRepositoryFormat):
> + """Bzr repository knit format 1.
> +
> + This repository format has:
> + - knits for file texts and inventory
> + - hash subdirectory based stores.
> + - knits for revisions and signatures
> + - TextStores for revisions and signatures.
> + - a format marker of its own
> + - an optional 'shared-storage' flag
> + - an optional 'no-working-trees' flag
> + """
> +
> + def get_format_string(self):
> + """See RepositoryFormat.get_format_string()."""
> + return "Bazaar-NG Knit Repository Format 1"
> +
> + def initialize(self, a_bzrdir, shared=False):
> + """Create a knit format 1 repository.
> +
> + :param shared: If true the repository will be initialized as a shared
> + repository.
> + """
> + from bzrlib.weavefile import write_weave_v5
> + from bzrlib.weave import Weave
> +
> + # Create an empty weave
> + sio = StringIO()
> + bzrlib.weavefile.write_weave_v5(Weave(), sio)
> + empty_weave = sio.getvalue()
> +
> + mutter('creating repository in %s.', a_bzrdir.transport.base)
> + dirs = ['revision-store', 'knits']
> + files = [('inventory.weave', StringIO(empty_weave)),
> + ]
> + utf8_files = [('format', self.get_format_string())]
> +
> + self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
> + return self.open(a_bzrdir=a_bzrdir, _found=True)
>
>
> # formats which have no format string are not discoverable
> # and not independently creatable, so are not registered.
> _default_format = RepositoryFormat7()
> RepositoryFormat.register_format(_default_format)
> +RepositoryFormat.register_format(RepositoryFormatKnit1())
> RepositoryFormat.set_default_format(_default_format)
> _legacy_formats = [RepositoryFormat4(),
> RepositoryFormat5(),
> @@ -906,17 +1093,17 @@
> revision_id.
> """
> # generic, possibly worst case, slow code path.
> - target_ids = set(self.source.all_revision_ids())
> + target_ids = set(self.target.all_revision_ids())
> if revision_id is not None:
> - source_ids = self.target.get_ancestry(revision_id)
> + source_ids = self.source.get_ancestry(revision_id)
> assert source_ids.pop(0) == None
> else:
> - source_ids = self.target.all_revision_ids()
> + source_ids = self.source.all_revision_ids()
> result_set = set(source_ids).difference(target_ids)
> # this may look like a no-op: its not. It preserves the ordering
> # other_ids had while only returning the members from other_ids
> # that we've decided we need.
> - return [rev_id for rev_id in other_ids if rev_id in result_set]
> + return [rev_id for rev_id in source_ids if rev_id in result_set]
>
> @classmethod
> def register_optimiser(klass, optimiser):
> @@ -1115,8 +1302,9 @@
> # default format.
> # XXX: robertc 20060220 reinstate this when there are two supported
> # formats which do not have an optimal code path between them.
> - #result.append((InterRepository, RepositoryFormat6(),
> - # RepositoryFormat.get_default_format()))
> + result.append((InterRepository,
> + RepositoryFormat6(),
> + RepositoryFormatKnit1()))
> for optimiser in InterRepository._optimisers:
> result.append((optimiser,
> optimiser._matching_repo_format,
> @@ -1125,3 +1313,54 @@
> # if there are specific combinations we want to use, we can add them
> # here.
> return result
> +
> +
> +class CopyConverter(object):
> + """A repository conversion tool which just performs a copy of the content.
> +
> + This is slow but quite reliable.
> + """
> +
> + def __init__(self, target_format):
> + """Create a CopyConverter.
> +
> + :param target_format: The format the resulting repository should be.
> + """
> + self.target_format = target_format
> +
> + def convert(self, repo, pb):
> + """Perform the conversion of to_convert, giving feedback via pb.
> +
> + :param to_convert: The disk object to convert.
> + :param pb: a progress bar to use for progress information.
> + """
> + self.pb = pb
> + self.count = 0
> + self.total = 3
Is this supposed to be a TODO, or just that get_format_string() fails
for everything that isn't a RepoMetaDir format?
Again, it seems like using side-effects like this is sub-optimal.
> + # this is only useful with metadir layouts - separated repo content.
> + # trigger an assertion if not such
> + repo._format.get_format_string()
> + self.repo_dir = repo.bzrdir
> + self.step('Moving repository to repository.backup')
> + self.repo_dir.transport.move('repository', 'repository.backup')
> + backup_transport = self.repo_dir.transport.clone('repository.backup')
> + self.source_repo = repo._format.open(self.repo_dir,
> + _found=True,
> + _override_transport=backup_transport)
> + self.step('Creating new repository')
> + converted = self.target_format.initialize(self.repo_dir,
> + self.source_repo.is_shared())
> + converted.lock_write()
> + try:
> + self.step('Copying content into repository.')
> + self.source_repo.copy_content_into(converted)
> + finally:
> + converted.unlock()
> + self.step('Deleting old repository content.')
> + self.repo_dir.transport.delete_tree('repository.backup')
> + self.pb.note('repository converted')
> +
> + def step(self, message):
> + """Update the pb by a step."""
> + self.count +=1
> + self.pb.update(message, self.count, self.total)
>
> === modified file 'bzrlib/tests/blackbox/test_upgrade.py'
> --- bzrlib/tests/blackbox/test_upgrade.py
> +++ bzrlib/tests/blackbox/test_upgrade.py
> @@ -21,6 +21,7 @@
> import os
>
> import bzrlib.bzrdir as bzrdir
> +import bzrlib.repository as repository
> from bzrlib.tests import TestCaseWithTransport
> from bzrlib.transport import get_transport
> import bzrlib.ui as ui
> @@ -30,6 +31,9 @@
> """A UI Factory which never captures its output.
> """
>
> + def clear(self):
> + """See progress.ProgressBar.clear()."""
> +
> def note(self, fmt_string, *args, **kwargs):
> """See progress.ProgressBar.note()."""
> print fmt_string % args
> @@ -37,7 +41,7 @@
> def progress_bar(self):
> return self
>
> - def update(self, message, count, total):
> + def update(self, message, count=None, total=None):
> """See progress.ProgressBar.update()."""
>
>
> @@ -57,6 +61,10 @@
> t.mkdir('format_5_branch')
> bzrdir.BzrDirFormat5().initialize(self.get_url('format_5_branch'))
> bzrdir.BzrDir.create_standalone_workingtree('current_format_branch')
> + d = bzrdir.BzrDir.create('metadir_weave_branch')
> + d.create_repository()
> + d.create_branch()
> + d.create_workingtree()
> self.run_bzr('checkout',
> self.get_url('current_format_branch'),
> 'current_format_checkout')
> @@ -129,3 +137,27 @@
> self.assertTrue(isinstance(
> bzrdir.BzrDir.open(self.get_url('format_5_branch'))._format,
> bzrdir.BzrDirMetaFormat1))
> +
> + def test_upgrade_explicit_knit(self):
> + # users can force an upgrade to knit format from a metadir weave
> + # branch
> + url = get_transport(self.get_url('metadir_weave_branch')).base
> + # check --format takes effect
> + bzrdir.BzrDirFormat.set_default_format(bzrdir.BzrDirFormat5())
> + (out, err) = self.run_bzr_captured(
> + ['upgrade', '--format=knit', url])
> + self.assertEqualDiff("""starting upgrade of %s
> +making backup of tree history
> +%s.bzr has been backed up to %s.bzr.backup
> +if conversion fails, you can move this directory back to .bzr
> +if it succeeds, you can remove this directory if you wish
> +starting repository conversion
> +repository converted
> +finished
> +""" % (url, url, url), out)
> + self.assertEqualDiff("", err)
> + converted_dir = bzrdir.BzrDir.open(self.get_url('metadir_weave_branch'))
> + self.assertTrue(isinstance(converted_dir._format,
> + bzrdir.BzrDirMetaFormat1))
> + self.assertTrue(isinstance(converted_dir.open_repository()._format,
> + repository.RepositoryFormatKnit1))
>
Is the 'has been moved to .bzr.backup' a true statement? At least it
looks like you are using 'delete_tree()' which would remove
repository.backup.
I also thought we were trying to switch to upgrading into '.bzr.new' and
then renaming into place, rather than renaming out of the way, and then
upgrading into place.
> === modified file 'bzrlib/tests/branch_implementations/test_branch.py'
> --- bzrlib/tests/branch_implementations/test_branch.py
> +++ bzrlib/tests/branch_implementations/test_branch.py
> @@ -266,23 +266,6 @@
> 'sig').read(),
> d2.open_repository().revision_store.get('A',
> 'sig').read())
> -
> - def test_upgrade_preserves_signatures(self):
> - wt = self.make_branch_and_tree('source')
> - wt.commit('A', allow_pointless=True, rev_id='A')
> - wt.branch.repository.sign_revision('A',
> - bzrlib.gpg.LoopbackGPGStrategy(None))
> - old_signature = wt.branch.repository.revision_store.get('A',
> - 'sig').read()
> - try:
> - upgrade(wt.basedir)
> - except errors.UpToDateFormat:
> - # this is in the most current format already.
> - return
> - wt = WorkingTree.open(wt.basedir)
> - new_signature = wt.branch.repository.revision_store.get('A',
> - 'sig').read()
> - self.assertEqual(old_signature, new_signature)
>
> def test_nicks(self):
> """Branch nicknames"""
>
> === modified file 'bzrlib/tests/bzrdir_implementations/test_bzrdir.py'
> --- bzrlib/tests/bzrdir_implementations/test_bzrdir.py
> +++ bzrlib/tests/bzrdir_implementations/test_bzrdir.py
> @@ -1021,16 +1021,36 @@
> # check that we can ask an instance if its upgradable
> dir = self.make_bzrdir('.')
> if dir.can_convert_format():
> - # if its updatable there must be an updater
> - self.assertTrue(isinstance(dir._format.get_converter(),
> - bzrdir.Converter))
> + # if its default updatable there must be an updater
> + # (we change the default to match the lastest known format
> + # as downgrades may not be available
> + old_format = bzrdir.BzrDirFormat.get_default_format()
> + bzrdir.BzrDirFormat.set_default_format(dir._format)
> + try:
> + self.assertTrue(isinstance(dir._format.get_converter(),
> + bzrdir.Converter))
> + finally:
> + bzrdir.BzrDirFormat.set_default_format(old_format)
> dir.needs_format_conversion(None)
>
> def test_upgrade_new_instance(self):
> """Does an available updater work ?."""
> dir = self.make_bzrdir('.')
> + # for now, check is not ready for partial bzrdirs.
> + dir.create_repository()
> + dir.create_branch()
> + dir.create_workingtree()
> if dir.can_convert_format():
> - dir._format.get_converter(None).convert(dir, ui.ui_factory.progress_bar())
> + # if its default updatable there must be an updater
> + # (we change the default to match the lastest known format
> + # as downgrades may not be available
> + old_format = bzrdir.BzrDirFormat.get_default_format()
> + bzrdir.BzrDirFormat.set_default_format(dir._format)
> + try:
> + dir._format.get_converter(None).convert(dir,
> + ui.ui_factory.progress_bar())
> + finally:
> + bzrdir.BzrDirFormat.set_default_format(old_format)
> # and it should pass 'check' now.
> check(bzrdir.BzrDir.open(self.get_url('.')).open_branch(), False)
>
>
> === modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
> --- bzrlib/tests/interrepository_implementations/test_interrepository.py
Otherwise, I think it looks good. And I assume this is prep work so that
you can actually change over to knits in the future. So +1 from me.
John
=:->
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 254 bytes
Desc: OpenPGP digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060221/81f20b75/attachment.pgp
More information about the bazaar
mailing list