Rev 2726: First edition, iter_commit_candidates. in http://people.ubuntu.com/~robertc/baz2.0/commit-candidates

Robert Collins robertc at robertcollins.net
Wed Aug 22 06:55:02 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/commit-candidates

------------------------------------------------------------
revno: 2726
revision-id: robertc at robertcollins.net-20070822055500-6co1vrzqhp8730c0
parent: pqm at pqm.ubuntu.com-20070817192843-0jaoxooskia72irk
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-candidates
timestamp: Wed 2007-08-22 15:55:00 +1000
message:
  First edition, iter_commit_candidates.
added:
  bzrlib/tests/workingtree_implementations/test_iter_commit_candidates.py test_iter_commit_can-20070822055451-ygz697i0te0mcg8p-1
modified:
  bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
  bzrlib/tests/workingtree_implementations/__init__.py __init__.py-20060203003124-b2aa5aca21a8bfad
=== added file 'bzrlib/tests/workingtree_implementations/test_iter_commit_candidates.py'
--- a/bzrlib/tests/workingtree_implementations/test_iter_commit_candidates.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_iter_commit_candidates.py	2007-08-22 05:55:00 +0000
@@ -0,0 +1,105 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+from cStringIO import StringIO
+import os
+
+from bzrlib import (
+    branch,
+    bzrdir,
+    errors,
+    revision as _mod_revision,
+    ui,
+    uncommit,
+    workingtree,
+    )
+from bzrlib.errors import (NotBranchError, NotVersionedError,
+                           UnsupportedOperation)
+from bzrlib.osutils import has_symlinks
+from bzrlib.tests import TestSkipped, TestCase
+from bzrlib.tests.workingtree_implementations import TestCaseWithWorkingTree
+from bzrlib.trace import mutter
+from bzrlib.workingtree import (TreeEntry, TreeDirectory, TreeFile, TreeLink,
+                                WorkingTree)
+
+
+class TestCommitCandidates(TestCaseWithWorkingTree):
+    """These tests test the iter_commit_candidates method.
+
+    They are strikingly similar to the _iter_changes InterTree tests because
+    they have to deal with the same corner cases. However, we have not yet
+    found a good, high performance, layering that addresses the corner cases
+    and is reusable in both cases. So as this method cannot be defined in
+    terms of layering, it needs to be fully defined here.
+    """
+
+    def test_iter_commit_candidates_empty(self):
+        """With no files changes, iterating candidates yields nothing."""
+        tree = self.make_branch_and_tree('tree')
+        tree.commit('a commit')
+        tree.lock_write()
+        try:
+            self.assertEqual([],
+                list(tree.iter_commit_candidates({
+                     tree.last_revision():tree.basis_tree()})))
+        finally:
+            tree.unlock()
+
+    def test_iter_single_parents_gives_all_modified_content_items(self):
+        tree = self.make_branch_and_tree('tree')
+        # get a tree with one symlink/file/dir committed, so we can 
+        # delta them
+        paths = ['dir', 'file']
+        ids = ['dirid', 'fileid']
+        self.build_tree(['tree/dir/', 'tree/file'])
+        if has_symlinks():
+            paths.append('link')
+            ids.append('linkid')
+            os.symlink('target', 'tree/link')
+        tree.add(paths, ids)
+        rev_id1 = tree.commit('commit')
+        # now change content
+        self.build_tree_contents([('tree/file', 'new content')])
+        if has_symlinks():
+            os.unlink('tree/link')
+            os.symlink('target 2', 'tree/link')
+        tree.lock_write()
+        tree_root = tree.path2id('')
+        expected_result = [
+#            ((u'', tree_root, None, u'', 'directory', False, None),
+#             [(None, None, None, None, None, None)],
+#             False),
+#            ((u'dir', 'dirid', tree_root, 'dir', 'directory', False, None),
+#             [(tree_root, 'dir', 'directory', False, None, rev_id1)],
+#             False),
+            ((u'file', 'fileid', tree_root, 'file', 'file', False, None),
+             [(tree_root, 'file', 'file', False,
+              'fdd68fdc095181052aaa4f74fdb2db4a2ce6eb9f', rev_id1)],
+             True),
+            ((u'link', 'linkid', tree_root, 'link', 'symlink', False, None),
+             [(tree_root, 'link', 'symlink', False, None, rev_id1)],
+             True)]
+        try:
+            self.assertEqual(expected_result,
+                list(tree.iter_commit_candidates({tree.last_revision():tree.basis_tree()})))
+        finally:
+            tree.unlock()
+
+# tests to write:
+# strict
+# specific files
+# consider root
+# on directory callback

=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py	2007-08-07 20:45:21 +0000
+++ b/bzrlib/mutabletree.py	2007-08-22 05:55:00 +0000
@@ -211,6 +211,143 @@
             revprops=revprops, *args, **kwargs)
         return committed_id
 
+    def iter_commit_candidates(self, parent_trees):
+        """Iterate over the items which may need to be committed.
+
+        This iterator will examine each path currently versioned in the tree.
+        Eventually, the content of the path will never be read, but currently
+        we do always read it because of the layering involved. The path is
+        intended to be, and always is statted.
+        If the path can be excluded from the commit without reading it it is
+        not returned by the iterator, otherwise it will be returned.
+
+        This functions exists to help commit produce:
+        Delta based:
+         - user output 
+         - flag paths that may require storage in the repository
+        Full text based (today):
+         - create a stored inventory in the repository
+         - update the dirstate:
+           - when autodeletion/add is on do so in the working inv
+           - make the inventory produced during commit become the
+             sole parent
+           - update stat information when we have calculated hash's of files.
+
+        The logic applied to each path is (in order):
+         - If the path is not one selected by the user, it is excluded.
+         - If there are no parents, it is included as it must be new.
+         - If the last-modified varies between any of the parents, it is 
+           included so the commit code can do last-modified calculations.
+         - If the parent id or name is different to the left most parent, it
+           is included.
+         - If the kind, size, or exec bit is different to the left most parent
+           it is included.
+         - If the hash of the file is known (by a hash cache hit) and it is 
+           the same as that of the left most parent, it is excluded.
+         - It is included.
+
+        The logic in commit for each item returned should be approximately:
+         - perform new-last-modified-required calculation based on the per file
+           graph.
+         - Open the path (using os.open with O_NOFOLLOW) - if this errors it is
+           a symlink, otherwise: perform an fstat to determine the actual kind
+           and current x flag.
+         - Insert a new text representing the path, with the always_insert flag
+           set to true if the new-last-modified-require calculation returned
+           True. OR. If there was a hash cache hit and new-last-modified is
+           True then insert a duplicate-text into the knit avoiding the hash
+           calculation of the open file.
+         - Insert a new item in the inventory being built with the key, hash
+           and size returned by the knit insertion.
+
+        :param parent_trees: A dict of revision_id:tree containing a key for 
+            all parent ids, and either a tree or None when a particular parent
+            is a ghost and unavailable. Note that the absence of a parent will
+            cause every path to be considered to have its last-modified
+            changed.
+        :return: An iterator over paths in the tree which are not excluded from
+            being recorded during commit. Each item that the iterator yields
+            contains a three-tuple. The first element is a tuple:
+            (path, id, parent_id, name, kind, exec bit, hash or None)
+            which contains the current tree details for the path. If a given
+            element is None that indicates that the data has not been acquired.
+            The second element is a vector each item of which is a tuple of the
+            historical data for this id in the matching parent from the tree's
+            parent list:
+            (parent_id, name, kind, exec bit, hash, last_modified)
+            The parent details vector always has as many elements as there are
+            parent trees.
+            Missing versioned files have the kind 'missing' in the current tree
+            status, and None for hash and executable bit.
+            The working tree hash is useful for fast-path insertions on changes
+            that do not alter the text, because that lets the storage layer
+            decide to avoid diffing against a given parent, and allows
+            arbitrary parent selection should that help in achieving this.
+            The third element in the tuple is a trinary flag, content_changed.
+            This is None if a content change against the left most parent has
+            not been evaluated; True if one has been evaluated and the content
+            was different, or False if one was evaluated and the content has
+            not changed. A changed kind or a changed symlink count towards this
+            flag.
+        """
+        # This is a generic inventory based implementation.
+        # XXX: When time permits, consolidate the new features of this generic
+        #      implementation as part of Tree.iter_changes ? they are somewhat
+        #      different. RBC 20070821
+        specific_files = None
+        include_unchanged = True
+        parent_ids = self.get_parent_ids()
+        trees = [parent_trees[rev_id] for rev_id in parent_ids]
+        if not trees:
+            # no parents, so use an empty revision tree
+            # If looking at this code and it seems easier to get
+            # an empty parent vector rather than one that is
+            # basically hardcoded - to make first commits faster,
+            # this can be done; but may involve more rework to get
+            # a good layering.
+            trees = [self.branch.repository.revision_tree(None)]
+        for tree in trees:
+            tree.lock_read()
+        parent_count = len(parent_ids)
+        try:
+            strict = False
+            auto_add_delete = True
+            input_iterator = self._iter_changes(trees[0],
+                include_unchanged=include_unchanged,
+                specific_files=specific_files,
+                extra_trees=trees[1:],
+                want_unversioned=auto_add_delete or strict,
+                require_versioned=True)
+            for (file_id, (basis_path, path), content_change,
+                (basis_versioned, versioned), (basis_parent, parent),
+                (basis_name, name), (basis_kind, kind), (basis_exec, executable)
+                ) in input_iterator:
+                parent_vector = []
+                # strip out unchanged entries which have the same last_modified.
+                if parent_count < 2:
+                    if not content_change:
+                        continue
+                # special case first parent as its always present with the curent
+                # structure; and is the common case
+                if basis_parent is not None:
+                    entry = trees[0].inventory[file_id]
+                    parent_vector.append(
+                        (basis_parent, basis_name, basis_kind, basis_exec,
+                         entry.text_sha1, entry.revision))
+                else:
+                    parent_vector.append((None, None, None, None, None, None))
+                if parent_count > 1:
+                    # check last_modified flags across parents here.
+                    pass
+                # TODO: get the file hash IFF it is available. This requires
+                #       layering changes to allow querying for this - typically
+                #       we need a stat fingerprint, and thats not available here.
+                yield ((path, file_id, parent, name, kind, executable, None),
+                    parent_vector, content_change)
+        finally:
+            for tree in trees:
+                tree.unlock()
+
     def _gather_kinds(self, files, kinds):
         """Helper function for add - sets the entries of kinds."""
         raise NotImplementedError(self._gather_kinds)

=== modified file 'bzrlib/tests/workingtree_implementations/__init__.py'
--- a/bzrlib/tests/workingtree_implementations/__init__.py	2007-07-12 07:22:52 +0000
+++ b/bzrlib/tests/workingtree_implementations/__init__.py	2007-08-22 05:55:00 +0000
@@ -95,6 +95,7 @@
         'bzrlib.tests.workingtree_implementations.test_inv',
         'bzrlib.tests.workingtree_implementations.test_is_control_filename',
         'bzrlib.tests.workingtree_implementations.test_is_ignored',
+        'bzrlib.tests.workingtree_implementations.test_iter_commit_candidates',
         'bzrlib.tests.workingtree_implementations.test_locking',
         'bzrlib.tests.workingtree_implementations.test_merge_from_branch',
         'bzrlib.tests.workingtree_implementations.test_mkdir',



More information about the bazaar-commits mailing list