[MERGE] 'knit' repository format [to reduce size of versioned-file merge]

John Arbash Meinel john at arbash-meinel.com
Wed Feb 22 05:23:08 GMT 2006


Robert Collins wrote:
> This patch adds the new repository format and upgrade logic that will be
> needed to activate knits when the versioned file merge is complete. It
> adds the UI to upgrade to knits, the logic to convert repositories
> within a meta-dir and matching tests.
> 
> Rob
> 


...

Rather than continually carry on this semi-hack. Why not just create a
Repository4, Repository5, Repository6 formats.

They don't really share a lot of code. (6 could inherit from 5 and just
set self._prefixed=True.

It just seems like this code chunk keeps getting moved around. And it
isn't all that great to start with. It just won't die. (And yes, I wrote
the initial version :)

> +class AllInOneRepository(Repository):
> +    """Legacy support - the repository behaviour for all-in-one branches."""
> +
> +    def __init__(self, _format, a_bzrdir, revision_store):
> +        # we reuse one control files instance.
> +        dir_mode = a_bzrdir._control_files._dir_mode
> +        file_mode = a_bzrdir._control_files._file_mode
> +
> +        def get_weave(name, prefixed=False):
> +            if name:
> +                name = safe_unicode(name)
> +            else:
> +                name = ''
> +            relpath = a_bzrdir._control_files._escape(name)
> +            weave_transport = a_bzrdir._control_files._transport.clone(relpath)
> +            ws = WeaveStore(weave_transport, prefixed=prefixed,
> +                            dir_mode=dir_mode,
> +                            file_mode=file_mode)
> +            if a_bzrdir._control_files._transport.should_cache():
> +                ws.enable_cache = True
> +            return ws
> +
> +        def get_store(name, compressed=True, prefixed=False):
> +            # FIXME: This approach of assuming stores are all entirely compressed
> +            # or entirely uncompressed is tidy, but breaks upgrade from 
> +            # some existing branches where there's a mixture; we probably 
> +            # still want the option to look for both.
> +            relpath = a_bzrdir._control_files._escape(name)
> +            store = TextStore(a_bzrdir._control_files._transport.clone(relpath),
> +                              prefixed=prefixed, compressed=compressed,
> +                              dir_mode=dir_mode,
> +                              file_mode=file_mode)
> +            #if self._transport.should_cache():
> +            #    cache_path = os.path.join(self.cache_root, name)
> +            #    os.mkdir(cache_path)
> +            #    store = bzrlib.store.CachedStore(store, cache_path)
> +            return store
> +
> +        # not broken out yet because the controlweaves|inventory_store
> +        # and text_store | weave_store bits are still different.
> +        if isinstance(_format, RepositoryFormat4):
> +            self.inventory_store = get_store('inventory-store')
> +            self.text_store = get_store('text-store')
> +        elif isinstance(_format, RepositoryFormat5):
> +            self.control_weaves = get_weave('')
> +            self.weave_store = get_weave('weaves')
> +        elif isinstance(_format, RepositoryFormat6):
> +            self.control_weaves = get_weave('')
> +            self.weave_store = get_weave('weaves', prefixed=True)
> +        else:
> +            raise errors.BzrError('unreachable code: unexpected repository'
> +                                  ' format.')
> +        revision_store.register_suffix('sig')
> +        super(AllInOneRepository, self).__init__(_format, a_bzrdir, a_bzrdir._control_files, revision_store)
> +
> +
> +class MetaDirRepository(Repository):
> +    """Repositories in the new meta-dir layout."""
> +
> +    def __init__(self, _format, a_bzrdir, control_files, revision_store):
> +        super(MetaDirRepository, self).__init__(_format,
> +                                                a_bzrdir,
> +                                                control_files,
> +                                                revision_store)
> +
> +        dir_mode = self.control_files._dir_mode
> +        file_mode = self.control_files._file_mode
> +
> +        def get_weave(name, prefixed=False):
> +            if name:
> +                name = safe_unicode(name)
> +            else:
> +                name = ''
> +            relpath = self.control_files._escape(name)
> +            weave_transport = self.control_files._transport.clone(relpath)
> +            ws = WeaveStore(weave_transport, prefixed=prefixed,
> +                            dir_mode=dir_mode,
> +                            file_mode=file_mode)
> +            if self.control_files._transport.should_cache():
> +                ws.enable_cache = True
> +            return ws
> +
> +        if isinstance(self._format, RepositoryFormat7):
> +            self.control_weaves = get_weave('')
> +            self.weave_store = get_weave('weaves', prefixed=True)
> +        elif isinstance(self._format, RepositoryFormatKnit1):
> +            self.control_weaves = get_weave('')
> +            self.weave_store = get_weave('knits', prefixed=True)
> +        else:
> +            raise errors.BzrError('unreachable code: unexpected repository'
> +                                  ' format.')
> +

Yipes! Now it has even spawned a friend.
And why is RepositoryFormatKnit creating a WeaveStore. I thought it had
to create a KnitStore, or something.

> +
>  class RepositoryFormat(object):
>      """A repository format.
>  
> @@ -562,6 +602,35 @@
>          """
>          raise NotImplementedError(self.get_format_string)
>  

something isn't right here. You have '_get_revision_store' raising
NotImplemented for _get_rev_store. And if you have _get_rev_store() it
seems like the other code should use it, rather than using get_weave()
and get_store().

> +    def _get_revision_store(self, repo_transport, control_files):
> +        """Return the revision store object for this a_bzrdir."""
> +        raise NotImplementedError(self._get_rev_store)
> +
> +    def _get_rev_store(self,
> +                   transport,
> +                   control_files,
> +                   name,
> +                   compressed=True,
> +                   prefixed=False):
> +        """Common logic for getting a revision store for a repository.
> +        
> +        see self._get_revision_store for the method to 
> +        get the store for a repository.
> +        """
> +        if name:
> +            name = safe_unicode(name)
> +        else:
> +            name = ''
> +        dir_mode = control_files._dir_mode
> +        file_mode = control_files._file_mode
> +        revision_store =TextStore(transport.clone(name),
> +                                  prefixed=prefixed,
> +                                  compressed=compressed,
> +                                  dir_mode=dir_mode,
> +                                  file_mode=file_mode)
> +        revision_store.register_suffix('sig')
> +        return revision_store
> +
>      def initialize(self, a_bzrdir, shared=False):
>          """Initialize a repository of this format in a_bzrdir.
>  

...

This seems like it should at least have a comment that we will be
replacing the 'from bzrlib.weave import Weave' to importing something else.

> +class RepositoryFormatKnit1(MetaDirRepositoryFormat):
> +    """Bzr repository knit format 1.
> +
> +    This repository format has:
> +     - knits for file texts and inventory
> +     - hash subdirectory based stores.
> +     - knits for revisions and signatures
> +     - TextStores for revisions and signatures.
> +     - a format marker of its own
> +     - an optional 'shared-storage' flag
> +     - an optional 'no-working-trees' flag
> +    """
> +
> +    def get_format_string(self):
> +        """See RepositoryFormat.get_format_string()."""
> +        return "Bazaar-NG Knit Repository Format 1"
> +
> +    def initialize(self, a_bzrdir, shared=False):
> +        """Create a knit format 1 repository.
> +
> +        :param shared: If true the repository will be initialized as a shared
> +                       repository.
> +        """
> +        from bzrlib.weavefile import write_weave_v5
> +        from bzrlib.weave import Weave
> +
> +        # Create an empty weave
> +        sio = StringIO()
> +        bzrlib.weavefile.write_weave_v5(Weave(), sio)
> +        empty_weave = sio.getvalue()
> +
> +        mutter('creating repository in %s.', a_bzrdir.transport.base)
> +        dirs = ['revision-store', 'knits']
> +        files = [('inventory.weave', StringIO(empty_weave)), 
> +                 ]
> +        utf8_files = [('format', self.get_format_string())]
> +        
> +        self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
> +        return self.open(a_bzrdir=a_bzrdir, _found=True)
>  
>  
>  # formats which have no format string are not discoverable
>  # and not independently creatable, so are not registered.
>  _default_format = RepositoryFormat7()
>  RepositoryFormat.register_format(_default_format)
> +RepositoryFormat.register_format(RepositoryFormatKnit1())
>  RepositoryFormat.set_default_format(_default_format)
>  _legacy_formats = [RepositoryFormat4(),
>                     RepositoryFormat5(),
> @@ -906,17 +1093,17 @@
>                              revision_id.
>          """
>          # generic, possibly worst case, slow code path.
> -        target_ids = set(self.source.all_revision_ids())
> +        target_ids = set(self.target.all_revision_ids())
>          if revision_id is not None:
> -            source_ids = self.target.get_ancestry(revision_id)
> +            source_ids = self.source.get_ancestry(revision_id)
>              assert source_ids.pop(0) == None
>          else:
> -            source_ids = self.target.all_revision_ids()
> +            source_ids = self.source.all_revision_ids()
>          result_set = set(source_ids).difference(target_ids)
>          # this may look like a no-op: its not. It preserves the ordering
>          # other_ids had while only returning the members from other_ids
>          # that we've decided we need.
> -        return [rev_id for rev_id in other_ids if rev_id in result_set]
> +        return [rev_id for rev_id in source_ids if rev_id in result_set]
>  
>      @classmethod
>      def register_optimiser(klass, optimiser):
> @@ -1115,8 +1302,9 @@
>          # default format.
>          # XXX: robertc 20060220 reinstate this when there are two supported
>          # formats which do not have an optimal code path between them.
> -        #result.append((InterRepository, RepositoryFormat6(),
> -        #              RepositoryFormat.get_default_format()))
> +        result.append((InterRepository,
> +                       RepositoryFormat6(),
> +                       RepositoryFormatKnit1()))
>          for optimiser in InterRepository._optimisers:
>              result.append((optimiser,
>                             optimiser._matching_repo_format,
> @@ -1125,3 +1313,54 @@
>          # if there are specific combinations we want to use, we can add them 
>          # here.
>          return result
> +
> +
> +class CopyConverter(object):
> +    """A repository conversion tool which just performs a copy of the content.
> +    
> +    This is slow but quite reliable.
> +    """
> +
> +    def __init__(self, target_format):
> +        """Create a CopyConverter.
> +
> +        :param target_format: The format the resulting repository should be.
> +        """
> +        self.target_format = target_format
> +        
> +    def convert(self, repo, pb):
> +        """Perform the conversion of to_convert, giving feedback via pb.
> +
> +        :param to_convert: The disk object to convert.
> +        :param pb: a progress bar to use for progress information.
> +        """
> +        self.pb = pb
> +        self.count = 0
> +        self.total = 3

Is this supposed to be a TODO, or just that get_format_string() fails
for everything that isn't a RepoMetaDir format?
Again, it seems like using side-effects like this is sub-optimal.

> +        # this is only useful with metadir layouts - separated repo content.
> +        # trigger an assertion if not such
> +        repo._format.get_format_string()
> +        self.repo_dir = repo.bzrdir
> +        self.step('Moving repository to repository.backup')
> +        self.repo_dir.transport.move('repository', 'repository.backup')
> +        backup_transport =  self.repo_dir.transport.clone('repository.backup')
> +        self.source_repo = repo._format.open(self.repo_dir,
> +            _found=True,
> +            _override_transport=backup_transport)
> +        self.step('Creating new repository')
> +        converted = self.target_format.initialize(self.repo_dir,
> +                                                  self.source_repo.is_shared())
> +        converted.lock_write()
> +        try:
> +            self.step('Copying content into repository.')
> +            self.source_repo.copy_content_into(converted)
> +        finally:
> +            converted.unlock()
> +        self.step('Deleting old repository content.')
> +        self.repo_dir.transport.delete_tree('repository.backup')
> +        self.pb.note('repository converted')
> +
> +    def step(self, message):
> +        """Update the pb by a step."""
> +        self.count +=1
> +        self.pb.update(message, self.count, self.total)
> 
> === modified file 'bzrlib/tests/blackbox/test_upgrade.py'
> --- bzrlib/tests/blackbox/test_upgrade.py	
> +++ bzrlib/tests/blackbox/test_upgrade.py	
> @@ -21,6 +21,7 @@
>  import os
>  
>  import bzrlib.bzrdir as bzrdir
> +import bzrlib.repository as repository
>  from bzrlib.tests import TestCaseWithTransport
>  from bzrlib.transport import get_transport
>  import bzrlib.ui as ui
> @@ -30,6 +31,9 @@
>      """A UI Factory which never captures its output.
>      """
>  
> +    def clear(self):
> +        """See progress.ProgressBar.clear()."""
> +
>      def note(self, fmt_string, *args, **kwargs):
>          """See progress.ProgressBar.note()."""
>          print fmt_string % args
> @@ -37,7 +41,7 @@
>      def progress_bar(self):
>          return self
>          
> -    def update(self, message, count, total):
> +    def update(self, message, count=None, total=None):
>          """See progress.ProgressBar.update()."""
>  
>  
> @@ -57,6 +61,10 @@
>          t.mkdir('format_5_branch')
>          bzrdir.BzrDirFormat5().initialize(self.get_url('format_5_branch'))
>          bzrdir.BzrDir.create_standalone_workingtree('current_format_branch')
> +        d = bzrdir.BzrDir.create('metadir_weave_branch')
> +        d.create_repository()
> +        d.create_branch()
> +        d.create_workingtree()
>          self.run_bzr('checkout',
>                       self.get_url('current_format_branch'),
>                       'current_format_checkout')
> @@ -129,3 +137,27 @@
>          self.assertTrue(isinstance(
>              bzrdir.BzrDir.open(self.get_url('format_5_branch'))._format,
>              bzrdir.BzrDirMetaFormat1))
> +
> +    def test_upgrade_explicit_knit(self):
> +        # users can force an upgrade to knit format from a metadir weave 
> +        # branch
> +        url = get_transport(self.get_url('metadir_weave_branch')).base
> +        # check --format takes effect
> +        bzrdir.BzrDirFormat.set_default_format(bzrdir.BzrDirFormat5())
> +        (out, err) = self.run_bzr_captured(
> +            ['upgrade', '--format=knit', url])
> +        self.assertEqualDiff("""starting upgrade of %s
> +making backup of tree history
> +%s.bzr has been backed up to %s.bzr.backup
> +if conversion fails, you can move this directory back to .bzr
> +if it succeeds, you can remove this directory if you wish
> +starting repository conversion
> +repository converted
> +finished
> +""" % (url, url, url), out)
> +        self.assertEqualDiff("", err)
> +        converted_dir = bzrdir.BzrDir.open(self.get_url('metadir_weave_branch'))
> +        self.assertTrue(isinstance(converted_dir._format,
> +                                   bzrdir.BzrDirMetaFormat1))
> +        self.assertTrue(isinstance(converted_dir.open_repository()._format,
> +                                   repository.RepositoryFormatKnit1))
> 

Is the 'has been moved to .bzr.backup' a true statement? At least it
looks like you are using 'delete_tree()' which would remove
repository.backup.

I also thought we were trying to switch to upgrading into '.bzr.new' and
then renaming into place, rather than renaming out of the way, and then
upgrading into place.

> === modified file 'bzrlib/tests/branch_implementations/test_branch.py'
> --- bzrlib/tests/branch_implementations/test_branch.py	
> +++ bzrlib/tests/branch_implementations/test_branch.py	
> @@ -266,23 +266,6 @@
>                              'sig').read(),
>                           d2.open_repository().revision_store.get('A', 
>                              'sig').read())
> -
> -    def test_upgrade_preserves_signatures(self):
> -        wt = self.make_branch_and_tree('source')
> -        wt.commit('A', allow_pointless=True, rev_id='A')
> -        wt.branch.repository.sign_revision('A',
> -            bzrlib.gpg.LoopbackGPGStrategy(None))
> -        old_signature = wt.branch.repository.revision_store.get('A',
> -            'sig').read()
> -        try:
> -            upgrade(wt.basedir)
> -        except errors.UpToDateFormat:
> -            # this is in the most current format already.
> -            return
> -        wt = WorkingTree.open(wt.basedir)
> -        new_signature = wt.branch.repository.revision_store.get('A',
> -            'sig').read()
> -        self.assertEqual(old_signature, new_signature)
>  
>      def test_nicks(self):
>          """Branch nicknames"""
> 
> === modified file 'bzrlib/tests/bzrdir_implementations/test_bzrdir.py'
> --- bzrlib/tests/bzrdir_implementations/test_bzrdir.py	
> +++ bzrlib/tests/bzrdir_implementations/test_bzrdir.py	
> @@ -1021,16 +1021,36 @@
>          # check that we can ask an instance if its upgradable
>          dir = self.make_bzrdir('.')
>          if dir.can_convert_format():
> -            # if its updatable there must be an updater
> -            self.assertTrue(isinstance(dir._format.get_converter(),
> -                                       bzrdir.Converter))
> +            # if its default updatable there must be an updater 
> +            # (we change the default to match the lastest known format
> +            # as downgrades may not be available
> +            old_format = bzrdir.BzrDirFormat.get_default_format()
> +            bzrdir.BzrDirFormat.set_default_format(dir._format)
> +            try:
> +                self.assertTrue(isinstance(dir._format.get_converter(),
> +                                           bzrdir.Converter))
> +            finally:
> +                bzrdir.BzrDirFormat.set_default_format(old_format)
>          dir.needs_format_conversion(None)
>  
>      def test_upgrade_new_instance(self):
>          """Does an available updater work ?."""
>          dir = self.make_bzrdir('.')
> +        # for now, check is not ready for partial bzrdirs.
> +        dir.create_repository()
> +        dir.create_branch()
> +        dir.create_workingtree()
>          if dir.can_convert_format():
> -            dir._format.get_converter(None).convert(dir, ui.ui_factory.progress_bar())
> +            # if its default updatable there must be an updater 
> +            # (we change the default to match the lastest known format
> +            # as downgrades may not be available
> +            old_format = bzrdir.BzrDirFormat.get_default_format()
> +            bzrdir.BzrDirFormat.set_default_format(dir._format)
> +            try:
> +                dir._format.get_converter(None).convert(dir,
> +                    ui.ui_factory.progress_bar())
> +            finally:
> +                bzrdir.BzrDirFormat.set_default_format(old_format)
>              # and it should pass 'check' now.
>              check(bzrdir.BzrDir.open(self.get_url('.')).open_branch(), False)
>  
> 
> === modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
> --- bzrlib/tests/interrepository_implementations/test_interrepository.py	


Otherwise, I think it looks good. And I assume this is prep work so that
you can actually change over to knits in the future. So +1 from me.

John
=:->

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 254 bytes
Desc: OpenPGP digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060221/81f20b75/attachment.pgp 


More information about the bazaar mailing list