Rev 2967: (robertc) Implement WorkingTree4.update_basis_by_delta giving a 30% performance improvement for commit. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Nov 5 21:12:00 GMT 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2967
revision-id: pqm at pqm.ubuntu.com-20071105211156-bwx6vf8f88m247cy
parent: pqm at pqm.ubuntu.com-20071105201604-gjsoryzr7dcx7r65
parent: robertc at robertcollins.net-20071105194028-5gc6rdajk96maaq1
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-11-05 21:11:56 +0000
message:
  (robertc) Implement WorkingTree4.update_basis_by_delta giving a 30% performance improvement for commit. (Robert Collins)
added:
  bzrlib/tests/blackbox/test_check.py test_check.py-20071024054728-mn44rt3z5hnqcbke-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/builtins.py             builtins.py-20050830033751-fc01482b9ca23183
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
  bzrlib/tests/blackbox/__init__.py __init__.py-20051128053524-eba30d8255e08dc3
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  bzrlib/tests/workingtree_implementations/test_parents.py test_set_parents.py-20060807231740-yicmnlci1mj8smu1-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
    ------------------------------------------------------------
    revno: 2929.2.4
    merged: robertc at robertcollins.net-20071105194028-5gc6rdajk96maaq1
    parent: robertc at robertcollins.net-20071024201121-0h1xkei0p1pf4diy
    parent: pqm at pqm.ubuntu.com-20071104224734-8l8km2gqk9n1pdla
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: dirstate.update_basis
    timestamp: Tue 2007-11-06 06:40:28 +1100
    message:
      Merge bzr.dev.
    ------------------------------------------------------------
    revno: 2929.2.3
    merged: robertc at robertcollins.net-20071024201121-0h1xkei0p1pf4diy
    parent: robertc at robertcollins.net-20071024054739-r4gjnintzwhnvg82
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: dirstate.update_basis
    timestamp: Thu 2007-10-25 06:11:21 +1000
    message:
      Review feedback.
    ------------------------------------------------------------
    revno: 2929.2.2
    merged: robertc at robertcollins.net-20071024054739-r4gjnintzwhnvg82
    parent: robertc at robertcollins.net-20071023221432-j8zndh1oiegql3cu
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: dirstate.update_basis
    timestamp: Wed 2007-10-24 15:47:39 +1000
    message:
      Review feedback on dirstate update_basis_via_delta logic.
    ------------------------------------------------------------
    revno: 2929.2.1
    merged: robertc at robertcollins.net-20071023221432-j8zndh1oiegql3cu
    parent: pqm at pqm.ubuntu.com-20071023082111-h6u34i4gvlb2nwch
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: dirstate.update_basis
    timestamp: Wed 2007-10-24 08:14:32 +1000
    message:
      * Commit updates the state of the working tree via a delta rather than
        supplying entirely new basis trees. For commit of a single specified file
        this reduces the wall clock time for commit by roughly a 30%.
        (Robert Collins, Martin Pool)
=== added file 'bzrlib/tests/blackbox/test_check.py'
--- a/bzrlib/tests/blackbox/test_check.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/blackbox/test_check.py	2007-10-24 20:11:21 +0000
@@ -0,0 +1,35 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for the 'check' CLI command."""
+
+from bzrlib.tests.blackbox import ExternalBase
+
+
+class TestCheck(ExternalBase):
+
+    def test_check_no_tree(self):
+        self.make_branch('.')
+        self.run_bzr('check')
+
+    def test_check_initial_tree(self):
+        self.make_branch_and_tree('.')
+        self.run_bzr('check')
+
+    def test_check_one_commit_tree(self):
+        tree = self.make_branch_and_tree('.')
+        tree.commit('hallelujah')
+        self.run_bzr('check')

=== modified file 'NEWS'
--- a/NEWS	2007-11-05 13:21:30 +0000
+++ b/NEWS	2007-11-05 21:11:56 +0000
@@ -68,6 +68,11 @@
    * Commit no longer checks for new text keys during insertion when the
      revision id was deterministically unique. (Robert Collins)
 
+   * Commit updates the state of the working tree via a delta rather than
+     supplying entirely new basis trees. For commit of a single specified file
+     this reduces the wall clock time for commit by roughly a 30%.
+     (Robert Collins, Martin Pool)
+
    * Committing a change which is not a merge and does not change the number of
      files in the tree is faster by utilising the data about whether files are
      changed to determine if the tree is unchanged rather than recalculating

=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py	2007-11-04 21:27:22 +0000
+++ b/bzrlib/builtins.py	2007-11-05 21:11:56 +0000
@@ -2368,10 +2368,36 @@
     def run(self, branch=None, verbose=False):
         from bzrlib.check import check
         if branch is None:
-            branch = Branch.open_containing('.')[0]
-        else:
-            branch = Branch.open(branch)
-        check(branch, verbose)
+            branch_obj = Branch.open_containing('.')[0]
+        else:
+            branch_obj = Branch.open(branch)
+        check(branch_obj, verbose)
+        # bit hacky, check the tree parent is accurate
+        try:
+            if branch is None:
+                tree = WorkingTree.open_containing('.')[0]
+            else:
+                tree = WorkingTree.open(branch)
+        except (errors.NoWorkingTree, errors.NotLocalUrl):
+            pass
+        else:
+            # This is a primitive 'check' for tree state. Currently this is not
+            # integrated into the main check logic as yet.
+            tree.lock_read()
+            try:
+                tree_basis = tree.basis_tree()
+                tree_basis.lock_read()
+                try:
+                    repo_basis = tree.branch.repository.revision_tree(
+                        tree.last_revision())
+                    if len(list(repo_basis._iter_changes(tree_basis))):
+                        raise errors.BzrCheckError(
+                            "Mismatched basis inventory content.")
+                    tree._validate()
+                finally:
+                    tree_basis.unlock()
+            finally:
+                tree.unlock()
 
 
 class cmd_upgrade(Command):

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-10-24 06:48:13 +0000
+++ b/bzrlib/commit.py	2007-11-05 19:40:28 +0000
@@ -384,17 +384,8 @@
 
             # Make the working tree up to date with the branch
             self._set_progress_stage("Updating the working tree")
-            rev_tree = self.builder.revision_tree()
-            # XXX: This will need to be changed if we support doing a
-            # selective commit while a merge is still pending - then we'd
-            # still have multiple parents after the commit.
-            #
-            # XXX: update_basis_by_delta is slower at present because it works
-            # on inventories, so this is not active until there's a native
-            # dirstate implementation.
-            ## self.work_tree.update_basis_by_delta(self.rev_id,
-            ##      self._basis_delta)
-            self.work_tree.set_parent_trees([(self.rev_id, rev_tree)])
+            self.work_tree.update_basis_by_delta(self.rev_id,
+                 self._basis_delta)
             self.reporter.completed(new_revno, self.rev_id)
             self._process_post_hooks(old_revno, new_revno)
         finally:

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-10-24 07:28:00 +0000
+++ b/bzrlib/dirstate.py	2007-11-05 19:40:28 +0000
@@ -212,6 +212,7 @@
 import zlib
 
 from bzrlib import (
+    cache_utf8,
     debug,
     errors,
     inventory,
@@ -305,15 +306,6 @@
     def __init__(self, path):
         """Create a  DirState object.
 
-        Attributes of note:
-
-        :attr _root_entrie: The root row of the directory/file information,
-            - contains the path to / - '', ''
-            - kind of 'directory',
-            - the file id of the root in utf8
-            - size of 0
-            - a packed state
-            - and no sha information.
         :param path: The path at which the dirstate file on disk should live.
         """
         # _header_state and _dirblock_state represent the current state
@@ -899,6 +891,48 @@
             processed_dirs.update(pending_dirs)
         return found
 
+    def _discard_merge_parents(self):
+        """Discard any parents trees beyond the first.
+        
+        Note that if this fails the dirstate is corrupted.
+
+        After this function returns the dirstate contains 2 trees, neither of
+        which are ghosted.
+        """
+        self._read_header_if_needed()
+        parents = self.get_parent_ids()
+        if len(parents) < 1:
+            return
+        # only require all dirblocks if we are doing a full-pass removal.
+        self._read_dirblocks_if_needed()
+        dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])
+        def iter_entries_removable():
+            for block in self._dirblocks:
+                deleted_positions = []
+                for pos, entry in enumerate(block[1]):
+                    yield entry
+                    if (entry[1][0][0], entry[1][1][0]) in dead_patterns:
+                        deleted_positions.append(pos)
+                if deleted_positions:
+                    if len(deleted_positions) == len(block):
+                        del block[1][:]
+                    else:
+                        for pos in reversed(deleted_positions):
+                            del block[1][pos]
+        # if the first parent is a ghost:
+        if parents[0] in self.get_ghosts():
+            empty_parent = [DirState.NULL_PARENT_DETAILS]
+            for entry in iter_entries_removable():
+                entry[1][1:] = empty_parent
+        else:
+            for entry in iter_entries_removable():
+                del entry[1][2:]
+
+        self._ghosts = []
+        self._parents = [parents[0]]
+        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+        self._header_state = DirState.IN_MEMORY_MODIFIED
+
     def _empty_parent_info(self):
         return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
                                                     len(self._ghosts))
@@ -1129,6 +1163,214 @@
             raise
         return result
 
+    def update_basis_by_delta(self, delta, new_revid):
+        """Update the parents of this tree after a commit.
+
+        This gives the tree one parent, with revision id new_revid. The
+        inventory delta is applied to the current basis tree to generate the
+        inventory for the parent new_revid, and all other parent trees are
+        discarded.
+
+        Note that an exception during the operation of this method will leave
+        the dirstate in a corrupt state where it should not be saved.
+
+        Finally, we expect all changes to be synchronising the basis tree with
+        the working tree.
+
+        :param new_revid: The new revision id for the trees parent.
+        :param delta: An inventory delta (see apply_inventory_delta) describing
+            the changes from the current left most parent revision to new_revid.
+        """
+        self._read_dirblocks_if_needed()
+        self._discard_merge_parents()
+        if self._ghosts != []:
+            raise NotImplementedError(self.update_basis_by_delta)
+        if len(self._parents) == 0:
+            # setup a blank tree, the most simple way.
+            empty_parent = DirState.NULL_PARENT_DETAILS
+            for entry in self._iter_entries():
+                entry[1].append(empty_parent)
+            self._parents.append(new_revid)
+
+        self._parents[0] = new_revid
+
+        delta = sorted(delta, reverse=True)
+        adds = []
+        changes = []
+        deletes = []
+        # The paths this function accepts are unicode and must be encoded as we
+        # go.
+        encode = cache_utf8.encode
+        inv_to_entry = self._inv_entry_to_details
+        # delta is now (deletes, changes), (adds) in reverse lexographical
+        # order.
+        # deletes in reverse lexographic order are safe to process in situ.
+        # renames are not, as a rename from any path could go to a path
+        # lexographically lower, so we transform renames into delete, add pairs,
+        # expanding them recursively as needed.
+        # At the same time, to reduce interface friction we convert the input
+        # inventory entries to dirstate.
+        root_only = ('', '')
+        for old_path, new_path, file_id, inv_entry in delta:
+            if old_path is None:
+                adds.append((None, encode(new_path), file_id,
+                    inv_to_entry(inv_entry), True))
+            elif new_path is None:
+                deletes.append((encode(old_path), None, file_id, None, True))
+            elif (old_path, new_path) != root_only:
+                # Renames:
+                # Because renames must preserve their children we must have
+                # processed all relocations and removes before hand. The sort
+                # order ensures we've examined the child paths, but we also
+                # have to execute the removals, or the split to an add/delete
+                # pair will result in the deleted item being reinserted, or
+                # renamed items being reinserted twice - and possibly at the
+                # wrong place. Splitting into a delete/add pair also simplifies
+                # the handling of entries with ('f', ...), ('r' ...) because
+                # the target of the 'r' is old_path here, and we add that to
+                # deletes, meaning that the add handler does not need to check
+                # for 'r' items on every pass.
+                self._update_basis_apply_deletes(deletes)
+                deletes = []
+                new_path_utf8 = encode(new_path)
+                # Split into an add/delete pair recursively.
+                adds.append((None, new_path_utf8, file_id,
+                    inv_to_entry(inv_entry), False))
+                # Expunge deletes that we've seen so that deleted/renamed
+                # children of a rename directory are handled correctly.
+                new_deletes = reversed(list(self._iter_child_entries(1,
+                    encode(old_path))))
+                # Remove the current contents of the tree at orig_path, and
+                # reinsert at the correct new path.
+                for entry in new_deletes:
+                    if entry[0][0]:
+                        source_path = entry[0][0] + '/' + entry[0][1]
+                    else:
+                        source_path = entry[0][1]
+                    target_path = new_path_utf8 + source_path[len(old_path):]
+                    adds.append((None, target_path, entry[0][2], entry[1][1], False))
+                    deletes.append(
+                        (source_path, target_path, entry[0][2], None, False))
+                deletes.append(
+                    (encode(old_path), new_path, file_id, None, False))
+            else:
+                # changes to just the root should not require remove/insertion
+                # of everything.
+                changes.append((encode(old_path), encode(new_path), file_id,
+                    inv_to_entry(inv_entry)))
+
+        # Finish expunging deletes/first half of renames.
+        self._update_basis_apply_deletes(deletes)
+        # Reinstate second half of renames and new paths.
+        self._update_basis_apply_adds(adds)
+        # Apply in-situ changes.
+        self._update_basis_apply_changes(changes)
+
+        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+        self._header_state = DirState.IN_MEMORY_MODIFIED
+        self._id_index = None
+        return
+
+    def _update_basis_apply_adds(self, adds):
+        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
+
+        They may be adds, or renames that have been split into add/delete
+        pairs.
+
+        :param adds: A sequence of adds. Each add is a tuple:
+            (None, new_path_utf8, file_id, (entry_details), real_add). real_add
+            is False when the add is the second half of a remove-and-reinsert
+            pair created to handle renames and deletes.
+        """
+        # Adds are accumulated partly from renames, so can be in any input
+        # order - sort it.
+        adds.sort()
+        # adds is now in lexographic order, which places all parents before
+        # their children, so we can process it linearly.
+        absent = 'ar'
+        for old_path, new_path, file_id, new_details, real_add in adds:
+            assert old_path is None
+            # the entry for this file_id must be in tree 0.
+            entry = self._get_entry(0, file_id, new_path)
+            if entry[0][2] != file_id:
+                raise errors.BzrError('dirstate: cannot apply delta, working'
+                    ' tree does not contain new entry %r %r' %
+                    (new_path, file_id))
+            if real_add and entry[1][1][0] not in absent:
+                raise errors.BzrError('dirstate: inconsistent delta, with '
+                    'tree 0. %r %r' % (new_path, file_id))
+            # We don't need to update the target of an 'r' because the handling
+            # of renames turns all 'r' situations into a delete at the original
+            # location.
+            entry[1][1] = new_details
+
+    def _update_basis_apply_changes(self, changes):
+        """Apply a sequence of changes to tree 1 during update_basis_by_delta.
+
+        :param adds: A sequence of changes. Each change is a tuple:
+            (path_utf8, path_utf8, file_id, (entry_details))
+        """
+        absent = 'ar'
+        for old_path, new_path, file_id, new_details in changes:
+            assert old_path == new_path
+            # the entry for this file_id must be in tree 0.
+            entry = self._get_entry(0, file_id, new_path)
+            if entry[0][2] != file_id:
+                raise errors.BzrError('dirstate: cannot apply delta, working'
+                    ' tree does not contain new entry %r %r' %
+                    (new_path, file_id))
+            if (entry[1][0][0] in absent or
+                entry[1][1][0] in absent):
+                raise errors.BzrError('dirstate: inconsistent delta, with '
+                    'tree 0. %r %r' % (new_path, file_id))
+            entry[1][1] = new_details
+
+    def _update_basis_apply_deletes(self, deletes):
+        """Apply a sequence of deletes to tree 1 during update_basis_by_delta.
+
+        They may be deletes, or renames that have been split into add/delete
+        pairs.
+
+        :param deletes: A sequence of deletes. Each delete is a tuple:
+            (old_path_utf8, new_path_utf8, file_id, None, real_delete).
+            real_delete is True when the desired outcome is an actual deletion
+            rather than the rename handling logic temporarily deleting a path
+            during the replacement of a parent.
+        """
+        null = DirState.NULL_PARENT_DETAILS
+        for old_path, new_path, file_id, _, real_delete in deletes:
+            if real_delete:
+                assert new_path is None
+            else:
+                assert new_path is not None
+            # the entry for this file_id must be in tree 1.
+            dirname, basename = osutils.split(old_path)
+            block_index, entry_index, dir_present, file_present = \
+                self._get_block_entry_index(dirname, basename, 1)
+            if not file_present:
+                raise errors.BzrError('dirstate: cannot apply delta, basis'
+                    ' tree does not contain new entry %r %r' %
+                    (old_path, file_id))
+            entry = self._dirblocks[block_index][1][entry_index]
+            if entry[0][2] != file_id:
+                raise errors.BzrError('mismatched file_id in tree 1 %r %r' %
+                    (old_path, file_id))
+            if real_delete:
+                if entry[1][0][0] != 'a':
+                    raise errors.BzrError('dirstate: inconsistent delta, with '
+                        'tree 0. %r %r' % (old_path, file_id))
+                del self._dirblocks[block_index][1][entry_index]
+            else:
+                if entry[1][0][0] == 'a':
+                    raise errors.BzrError('dirstate: inconsistent delta, with '
+                        'tree 0. %r %r' % (old_path, file_id))
+                elif entry[1][0][0] == 'r':
+                    # implement the rename
+                    del self._dirblocks[block_index][1][entry_index]
+                else:
+                    # it is being resurrected here, so blank it out temporarily.
+                    self._dirblocks[block_index][1][entry_index][1][1] = null
+
     def update_entry(self, entry, abspath, stat_value,
                      _stat_to_minikind=_stat_to_minikind,
                      _pack_stat=pack_stat):
@@ -1388,8 +1630,8 @@
         # linear search through entries at this path to find the one
         # requested.
         while entry_index < len(block) and block[entry_index][0][1] == basename:
-            if block[entry_index][1][tree_index][0] not in \
-                       ('a', 'r'): # absent, relocated
+            if block[entry_index][1][tree_index][0] not in 'ar':
+                # neither absent or relocated
                 return block_index, entry_index, True, True
             entry_index += 1
         return block_index, entry_index, True, False
@@ -1528,6 +1770,47 @@
             raise Exception("can't pack %s" % inv_entry)
         return (minikind, fingerprint, size, executable, tree_data)
 
+    def _iter_child_entries(self, tree_index, path_utf8):
+        """Iterate over all the entries that are children of path_utf.
+
+        This only returns entries that are present (not in 'a', 'r') in 
+        tree_index. tree_index data is not refreshed, so if tree 0 is used,
+        results may differ from that obtained if paths were statted to
+        determine what ones were directories.
+
+        Asking for the children of a non-directory will return an empty
+        iterator.
+        """
+        pending_dirs = []
+        next_pending_dirs = [path_utf8]
+        absent = 'ar'
+        while next_pending_dirs:
+            pending_dirs = next_pending_dirs
+            next_pending_dirs = []
+            for path in pending_dirs:
+                block_index, present = self._find_block_index_from_key(
+                    (path, '', ''))
+                if block_index == 0:
+                    block_index = 1
+                    if len(self._dirblocks) == 1:
+                        # asked for the children of the root with no other
+                        # contents.
+                        return
+                if not present:
+                    # children of a non-directory asked for.
+                    continue
+                block = self._dirblocks[block_index]
+                for entry in block[1]:
+                    kind = entry[1][tree_index][0]
+                    if kind not in absent:
+                        yield entry
+                    if kind == 'd':
+                        if entry[0][0]:
+                            path = entry[0][0] + '/' + entry[0][1]
+                        else:
+                            path = entry[0][1]
+                        next_pending_dirs.append(path)
+    
     def _iter_entries(self):
         """Iterate over all the entries in the dirstate.
 
@@ -1800,7 +2083,7 @@
         # one: the current tree
         for entry in self._iter_entries():
             # skip entries not in the current tree
-            if entry[1][0][0] in ('a', 'r'): # absent, relocated
+            if entry[1][0][0] in 'ar': # absent, relocated
                 continue
             by_path[entry[0]] = [entry[1][0]] + \
                 [DirState.NULL_PARENT_DETAILS] * parent_count
@@ -1936,7 +2219,7 @@
                 return None
         while current_new or current_old:
             # skip entries in old that are not really there
-            if current_old and current_old[1][0][0] in ('r', 'a'):
+            if current_old and current_old[1][0][0] in 'ar':
                 # relocated or absent
                 current_old = advance(old_iterator)
                 continue
@@ -2018,7 +2301,7 @@
         all_remaining_keys = set()
         # Dont check the working tree, because it's going.
         for details in current_old[1][1:]:
-            if details[0] not in ('a', 'r'): # absent, relocated
+            if details[0] not in 'ar': # absent, relocated
                 all_remaining_keys.add(current_old[0])
             elif details[0] == 'r': # relocated
                 # record the key for the real path.
@@ -2124,7 +2407,7 @@
                         self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
                     assert present, 'could not find entry for %s' % (other_key,)
                     update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
-                    if update_details[0] in ('r', 'a'): # relocated, absent
+                    if update_details[0] in 'ar': # relocated, absent
                         # its a pointer or absent in lookup_index's tree, use
                         # it as is.
                         new_entry[1].append(update_details)
@@ -2225,7 +2508,6 @@
                     "dirblock for %r is not sorted:\n%s" % \
                     (dirblock[0], pformat(dirblock)))
 
-
         def check_valid_parent():
             """Check that the current entry has a valid parent.
 
@@ -2266,12 +2548,15 @@
                 "wrong number of entry details for row\n%s" \
                 ",\nexpected %d" % \
                 (pformat(entry), tree_count))
+            absent_positions = 0
             for tree_index, tree_state in enumerate(entry[1]):
                 this_tree_map = id_path_maps[tree_index]
                 minikind = tree_state[0]
+                if minikind in 'ar':
+                    absent_positions += 1
                 # have we seen this id before in this column?
                 if file_id in this_tree_map:
-                    previous_path = this_tree_map[file_id]
+                    previous_path, previous_loc = this_tree_map[file_id]
                     # any later mention of this file must be consistent with
                     # what was said before
                     if minikind == 'a':
@@ -2291,19 +2576,23 @@
                         # pointed to by a relocation, which must point here
                         if previous_path != this_path:
                             raise AssertionError(
-                            "entry %r inconsistent with previous path %r" % \
-                            (entry, previous_path))
+                                "entry %r inconsistent with previous path %r "
+                                "seen at %r" %
+                                (entry, previous_path, previous_loc))
                         check_valid_parent()
                 else:
                     if minikind == 'a':
                         # absent; should not occur anywhere else
-                        this_tree_map[file_id] = None
+                        this_tree_map[file_id] = None, this_path
                     elif minikind == 'r':
                         # relocation, must occur at expected location 
-                        this_tree_map[file_id] = tree_state[1]
+                        this_tree_map[file_id] = tree_state[1], this_path
                     else:
-                        this_tree_map[file_id] = this_path
+                        this_tree_map[file_id] = this_path, this_path
                         check_valid_parent()
+            if absent_positions == tree_count:
+                raise AssertionError(
+                    "entry %r has no data for any tree." % (entry,))
 
     def _wipe_state(self):
         """Forget all state information about the dirstate."""

=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py	2007-10-12 08:00:07 +0000
+++ b/bzrlib/mutabletree.py	2007-10-23 22:14:32 +0000
@@ -437,6 +437,13 @@
         inventory for the parent new_revid, and all other parent trees are
         discarded.
 
+        All the changes in the delta should be changes synchronising the basis
+        tree with some or all of the working tree, with a change to a directory
+        requiring that its contents have been recursively included. That is,
+        this is not a general purpose tree modification routine, but a helper
+        for commit which is not required to handle situations that do not arise
+        outside of commit.
+
         :param new_revid: The new revision id for the trees parent.
         :param delta: An inventory delta (see apply_inventory_delta) describing
             the changes from the current left most parent revision to new_revid.

=== modified file 'bzrlib/tests/blackbox/__init__.py'
--- a/bzrlib/tests/blackbox/__init__.py	2007-09-17 12:46:56 +0000
+++ b/bzrlib/tests/blackbox/__init__.py	2007-10-24 05:47:39 +0000
@@ -53,6 +53,7 @@
                      'bzrlib.tests.blackbox.test_bundle_info',
                      'bzrlib.tests.blackbox.test_cat',
                      'bzrlib.tests.blackbox.test_cat_revision',
+                     'bzrlib.tests.blackbox.test_check',
                      'bzrlib.tests.blackbox.test_checkout',
                      'bzrlib.tests.blackbox.test_command_encoding',
                      'bzrlib.tests.blackbox.test_commit',

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-11-01 09:52:45 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-11-05 19:40:28 +0000
@@ -1392,6 +1392,122 @@
             state.unlock()
 
 
+class TestIterChildEntries(TestCaseWithDirState):
+
+    def create_dirstate_with_two_trees(self):
+        """This dirstate contains multiple files and directories.
+
+         /        a-root-value
+         a/       a-dir
+         b/       b-dir
+         c        c-file
+         d        d-file
+         a/e/     e-dir
+         a/f      f-file
+         b/g      g-file
+         b/h\xc3\xa5  h-\xc3\xa5-file  #This is u'\xe5' encoded into utf-8
+
+        Notice that a/e is an empty directory.
+
+        There is one parent tree, which has the same shape with the following variations:
+        b/g in the parent is gone.
+        b/h in the parent has a different id
+        b/i is new in the parent 
+        c is renamed to b/j in the parent
+
+        :return: The dirstate, still write-locked.
+        """
+        packed_stat = 'AAAAREUHaIpFB2iKAAADAQAtkqUAAIGk'
+        null_sha = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
+        NULL_PARENT_DETAILS = dirstate.DirState.NULL_PARENT_DETAILS
+        root_entry = ('', '', 'a-root-value'), [
+            ('d', '', 0, False, packed_stat),
+            ('d', '', 0, False, 'parent-revid'),
+            ]
+        a_entry = ('', 'a', 'a-dir'), [
+            ('d', '', 0, False, packed_stat),
+            ('d', '', 0, False, 'parent-revid'),
+            ]
+        b_entry = ('', 'b', 'b-dir'), [
+            ('d', '', 0, False, packed_stat),
+            ('d', '', 0, False, 'parent-revid'),
+            ]
+        c_entry = ('', 'c', 'c-file'), [
+            ('f', null_sha, 10, False, packed_stat),
+            ('r', 'b/j', 0, False, ''),
+            ]
+        d_entry = ('', 'd', 'd-file'), [
+            ('f', null_sha, 20, False, packed_stat),
+            ('f', 'd', 20, False, 'parent-revid'),
+            ]
+        e_entry = ('a', 'e', 'e-dir'), [
+            ('d', '', 0, False, packed_stat),
+            ('d', '', 0, False, 'parent-revid'),
+            ]
+        f_entry = ('a', 'f', 'f-file'), [
+            ('f', null_sha, 30, False, packed_stat),
+            ('f', 'f', 20, False, 'parent-revid'),
+            ]
+        g_entry = ('b', 'g', 'g-file'), [
+            ('f', null_sha, 30, False, packed_stat),
+            NULL_PARENT_DETAILS,
+            ]
+        h_entry1 = ('b', 'h\xc3\xa5', 'h-\xc3\xa5-file1'), [
+            ('f', null_sha, 40, False, packed_stat),
+            NULL_PARENT_DETAILS,
+            ]
+        h_entry2 = ('b', 'h\xc3\xa5', 'h-\xc3\xa5-file2'), [
+            NULL_PARENT_DETAILS,
+            ('f', 'h', 20, False, 'parent-revid'),
+            ]
+        i_entry = ('b', 'i', 'i-file'), [
+            NULL_PARENT_DETAILS,
+            ('f', 'h', 20, False, 'parent-revid'),
+            ]
+        j_entry = ('b', 'j', 'c-file'), [
+            ('r', 'c', 0, False, ''),
+            ('f', 'j', 20, False, 'parent-revid'),
+            ]
+        dirblocks = []
+        dirblocks.append(('', [root_entry]))
+        dirblocks.append(('', [a_entry, b_entry, c_entry, d_entry]))
+        dirblocks.append(('a', [e_entry, f_entry]))
+        dirblocks.append(('b', [g_entry, h_entry1, h_entry2, i_entry, j_entry]))
+        state = dirstate.DirState.initialize('dirstate')
+        state._validate()
+        try:
+            state._set_data(['parent'], dirblocks)
+        except:
+            state.unlock()
+            raise
+        return state, dirblocks
+
+    def test_iter_children_b(self):
+        state, dirblocks = self.create_dirstate_with_two_trees()
+        self.addCleanup(state.unlock)
+        expected_result = []
+        expected_result.append(dirblocks[3][1][2]) # h2
+        expected_result.append(dirblocks[3][1][3]) # i
+        expected_result.append(dirblocks[3][1][4]) # j
+        self.assertEqual(expected_result,
+            list(state._iter_child_entries(1, 'b')))
+
+    def test_iter_child_root(self):
+        state, dirblocks = self.create_dirstate_with_two_trees()
+        self.addCleanup(state.unlock)
+        expected_result = []
+        expected_result.append(dirblocks[1][1][0]) # a
+        expected_result.append(dirblocks[1][1][1]) # b
+        expected_result.append(dirblocks[1][1][3]) # d
+        expected_result.append(dirblocks[2][1][0]) # e
+        expected_result.append(dirblocks[2][1][1]) # f
+        expected_result.append(dirblocks[3][1][2]) # h2
+        expected_result.append(dirblocks[3][1][3]) # i
+        expected_result.append(dirblocks[3][1][4]) # j
+        self.assertEqual(expected_result,
+            list(state._iter_child_entries(1, '')))
+
+
 class TestDirstateSortOrder(TestCaseWithTransport):
     """Test that DirState adds entries in the right order."""
 

=== modified file 'bzrlib/tests/workingtree_implementations/test_parents.py'
--- a/bzrlib/tests/workingtree_implementations/test_parents.py	2007-10-12 08:00:07 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_parents.py	2007-10-24 05:47:39 +0000
@@ -260,7 +260,11 @@
 
     def assertDeltaApplicationResultsInExpectedBasis(self, tree, revid, delta,
         expected_inventory):
-        tree.update_basis_by_delta(revid, delta)
+        tree.lock_write()
+        try:
+            tree.update_basis_by_delta(revid, delta)
+        finally:
+            tree.unlock()
         # check the last revision was adjusted to rev_id
         self.assertEqual(revid, tree.last_revision())
         # check the parents are what we expect
@@ -354,8 +358,14 @@
                 parents.append(extra_parent)
             tree.set_parent_ids(parents)
         self.fake_up_revision(tree, new_revid, new_shape)
+        # give tree an inventory of new_shape
+        tree._write_inventory(new_shape)
         self.assertDeltaApplicationResultsInExpectedBasis(tree, new_revid,
             delta, new_shape)
+        # The tree should be internally consistent; while this is a moderately
+        # large hammer, this is a particularly sensitive area of code, so the
+        # extra assurance is well worth it.
+        tree._validate()
         osutils.rmtree('tree')
 
     def test_no_parents_just_root(self):
@@ -487,6 +497,23 @@
         self.assertTransitionFromBasisToShape(basis_shape, old_revid,
             new_shape, new_revid)
 
+    def test_parent_child_swap(self):
+        # test a A->A/B and A/B->A path swap.
+        old_revid = 'old-parent'
+        basis_shape = Inventory(root_id=None)
+        self.add_dir(basis_shape, old_revid, 'root-id', None, '')
+        self.add_dir(basis_shape, old_revid, 'dir-id-A', 'root-id', 'A')
+        self.add_dir(basis_shape, old_revid, 'dir-id-B', 'dir-id-A', 'B')
+        self.add_link(basis_shape, old_revid, 'link-id-C', 'dir-id-B', 'C', 'C')
+        new_revid = 'new-parent'
+        new_shape = Inventory(root_id=None)
+        self.add_new_root(new_shape, old_revid, new_revid)
+        self.add_dir(new_shape, new_revid, 'dir-id-B', 'root-id', 'A')
+        self.add_dir(new_shape, new_revid, 'dir-id-A', 'dir-id-B', 'B')
+        self.add_link(new_shape, new_revid, 'link-id-C', 'dir-id-A', 'C', 'C')
+        self.assertTransitionFromBasisToShape(basis_shape, old_revid,
+            new_shape, new_revid)
+
     def test_path_swap(self):
         # test a A->B and B->A path swap.
         old_revid = 'old-parent'
@@ -575,3 +602,21 @@
         self.add_link(new_shape, new_revid, 'link-id-B', 'root-id', 'B', 'C')
         self.assertTransitionFromBasisToShape(basis_shape, old_revid,
             new_shape, new_revid)
+
+    def test_move_moves_children_recursively(self):
+        old_revid = 'old-parent'
+        basis_shape = Inventory(root_id=None)
+        self.add_dir(basis_shape, old_revid, 'root-id', None, '')
+        self.add_dir(basis_shape, old_revid, 'dir-id-A', 'root-id', 'A')
+        self.add_dir(basis_shape, old_revid, 'dir-id-B', 'dir-id-A', 'B')
+        self.add_link(basis_shape, old_revid, 'link-id-C', 'dir-id-B', 'C', 'D')
+        new_revid = 'new-parent'
+        new_shape = Inventory(root_id=None)
+        self.add_new_root(new_shape, old_revid, new_revid)
+        # the moved path:
+        self.add_dir(new_shape, new_revid, 'dir-id-A', 'root-id', 'B')
+        # unmoved children.
+        self.add_dir(new_shape, old_revid, 'dir-id-B', 'dir-id-A', 'B')
+        self.add_link(new_shape, old_revid, 'link-id-C', 'dir-id-B', 'C', 'D')
+        self.assertTransitionFromBasisToShape(basis_shape, old_revid,
+            new_shape, new_revid)

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-10-24 07:28:00 +0000
+++ b/bzrlib/workingtree_4.py	2007-11-05 19:40:28 +0000
@@ -1213,6 +1213,11 @@
             for file_id in file_ids:
                 self._inventory.remove_recursive_id(file_id)
 
+    def update_basis_by_delta(self, new_revid, delta):
+        """See MutableTree.update_basis_by_delta."""
+        assert self.last_revision() != new_revid
+        self.current_dirstate().update_basis_by_delta(delta, new_revid)
+
     @needs_read_lock
     def _validate(self):
         self._dirstate._validate()
@@ -1220,7 +1225,8 @@
     @needs_tree_write_lock
     def _write_inventory(self, inv):
         """Write inventory as the current inventory."""
-        assert not self._dirty, "attempting to write an inventory when the dirstate is dirty will cause data loss"
+        assert not self._dirty, ("attempting to write an inventory when the "
+            "dirstate is dirty will cause data loss")
         self.current_dirstate().set_state_from_inventory(inv)
         self._make_dirty(reset_inventory=False)
         if self._inventory is not None:




More information about the bazaar-commits mailing list