[MERGE] Refactor commit to prepare for population by tree walking

Fri Jun 29 21:42:37 BST 2007

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Ian Clatworthy wrote:
> === modified file bzrlib/commit.py
> --- bzrlib/commit.py
> +++ bzrlib/commit.py
> @@ -65,7 +65,8 @@
>  import bzrlib.config
>  from bzrlib.errors import (BzrError, PointlessCommit,
>                             ConflictsInTree,
> -                           StrictCommitFailed
> +                           StrictCommitFailed,
> +                           NoSuchId
>                             )

The preferred style is to simply import errors, and then use
errors.NoSuchId.  If you do import NoSuchId, try to maintain
alphabetical order.  And make sure every entry has a comma after it, so
that adding new entries doesn't entail editing the preceeding line.

> -            self.pb.show_bar = False
> +            self.pb.show_bar = True

Thanks :-)

> -        deleted_ids = []
> -        deleted_paths = set()
> -        for path, new_ie in entries:
> -            self._emit_progress_next_entry()
> -            file_id = new_ie.file_id

Hmm.  We really need a diff that doesn't get confused by indenting changes.

> +            root_added_already = True
> +
> +        # Build the new inventory. _populate_from_tree() is the preferred
> +        # direction here but it doesn't support multiple parents yet,
> +        # it triggers a unicode normalisation issue in the test suite and
> +        # it needs more testing.

> +        populator = self._populate_from_inventory
> +        #if len(self.parents) == 1:
> +        #    populator = self._populate_from_tree
> +        populator(specific_files, root_added_already)

^^ this is sort of ugly.  It would be much nicer to restore
populate_from_tree first.

> +            try:
> +                kind = self.work_tree.kind(file_id)
> +                # TODO: specific_files filtering before nested tree processing?
> +                if kind == 'tree-reference' and self.recursive == 'down':
> +                    self._commit_nested_tree(path)

^^^ Yeah.  No doubt a thinko on my part.
> +            self._record_entry(path, file_id, specific_files, kind, name,
> +                parent_id, definitely_changed, new_ie)
> +
> +        # Unversion IDs that were found to be deleted
> +        self.work_tree.unversion(deleted_ids)

^^^ Doing this last disturbs me.  You can't normally do unversioning
last, because you may need to put something else into the same position.
 This one is okay, I guess.

> +    def _populate_from_tree(self, specific_files, root_added_already):
> +        """Populate the CommitBuilder by walking the working tree."""
> +        # Until trees supports an "iter_commitable" iterator that
> +        # understand multiple parents, this assertion is required.
> +        assert len(self.parents) == 1
> +
> +        deleted_ids = []
> +        deleted_paths = set()
> +        entries = self.work_tree._iter_changes(self.basis_tree,
> +            include_unchanged=True, want_unversioned=True,
> +            require_versioned=False)

I don't see why you've got require_versioned=False.  That only affects
behavior when specific_files is specified.

> +            # Skip unknowns unless strict mode
> +            if versioned_flags == (False,False):
> +                if self.strict:
> +                    raise StrictCommitFailed()
> +                else:
> +                    continue

I don't think you know that the files are unknown at this point.  They
may be ignored.

> +            # TODO: specific_files filtering before nested tree processing?
> +            # TODO: keep track of nested trees and don't recurse into them?
> +            if kind == 'tree-reference' and self.recursive == 'down':
> +                self._commit_nested_tree(path)

^^^ This should include file_id at least.

> +        for id in deleted_ids:
> +            try:
> +                self.work_tree.unversion([id])
> +            except NoSuchId:
> +                pass

^^^ I think it would be better to handle this as another call to
record_entry.

> +    def _commit_nested_tree(self, path):
> +        "Commit a nested tree."
> +        sub_tree = WorkingTree.open(self.work_tree.abspath(path))

^^^ the recommended way of doing this is self.get_nested_tree(self,
file_id, path)

> +        # FIXME: be more comprehensive here:
> +        # this works when both trees are in --trees repository,
> +        # but when both are bound to a different repository,
> +        # it fails;

So the concern here is about the case where they are using the same
repository as bound branches?

> +    def _record_entry(self, path, file_id, specific_files, kind, name,
> +                      parent_id, definitely_changed, existing_ie=None):

^^^ I think this should be building up an inventory delta.  That's meant
to be the new way of manipulating tree contents.  Basically, we
shouldn't be touching Tree.inventory at all.

> +        "Record the new inventory entry for a path if any."
> +        # mutter('check %s {%s}', path, file_id)
> +        if (not specific_files or 
> +            is_inside_or_parent_of_any(specific_files, path)):
> +                # mutter('%s selected for commit', path)
> +                if definitely_changed or existing_ie is None:
> +                    ie = inventory.make_entry(kind, name, parent_id, file_id)
> +                else:
> +                    ie = existing_ie.copy()
> +                    ie.revision = None
> +        else:
> +            # mutter('%s not selected for commit', path)
> +            if self.basis_inv.has_id(file_id):

^^^ I think there's a tree method, but anyway, this is data you already
know from iter_changes.

> +    def _report_change(self, ie, path):
> +        """Report a change to the user.
> +
> +        The change that has occurred is described relative to the basis
> +        inventory.
> +        """
> +        if (self.basis_inv.has_id(ie.file_id)):
> +            basis_ie = self.basis_inv[ie.file_id]
> +        else:
> +            basis_ie = None
> +        change = ie.describe_change(basis_ie, ie)
> +        if change in (InventoryEntry.RENAMED, 
> +            InventoryEntry.MODIFIED_AND_RENAMED):
> +            old_path = self.basis_inv.id2path(ie.file_id)
> +            self.reporter.renamed(change, old_path, path)
> +        else:
> +            self.reporter.snapshot_change(change, path)

I think it would be nice to reuse delta._ChangeReporter here.
Consistency FTW.  Also not using inventories FTW.

> +    def _emit_progress_set_stage(self, name, entries_title=None):
>          """Set the progress stage and emit an update to the progress bar."""

^^^ does this need to be emitting progress *and* setting stage?

Aaron
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD4DBQFGhW690F+nu1YWqI0RAjo0AJ935IeK63Cj7Xw4VfcT/fMJWK4nZACYwcgL
Bzrscux/f7PrNDbzHTlFig==
=XCXE
-----END PGP SIGNATURE-----