Rev 2939: * Commit with many automatically found deleted paths no longer performs in http://people.ubuntu.com/~robertc/baz2.0/commit

Robert Collins robertc at robertcollins.net
Wed Oct 24 23:37:59 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/commit

------------------------------------------------------------
revno: 2939
revision-id:robertc at robertcollins.net-20071024223742-fhjlj7l6lu77s9zq
parent: pqm at pqm.ubuntu.com-20071024181951-qqo4r5mqrhr032pf
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Thu 2007-10-25 08:37:42 +1000
message:
  * Commit with many automatically found deleted paths no longer performs
    linear scanning for the children of those paths during inventory
    iteration. This should fix commit performance blowing out when many such
    paths occur during commit. (Robert Collins, #156491)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
=== modified file 'NEWS'
--- a/NEWS	2007-10-24 17:19:20 +0000
+++ b/NEWS	2007-10-24 22:37:42 +0000
@@ -51,6 +51,11 @@
    * Commit no longer checks for new text keys during insertion when the
      revision id was deterministically unique. (Robert Collins)
 
+   * Commit with many automatically found deleted paths no longer performs
+     linear scanning for the children of those paths during inventory
+     iteration. This should fix commit performance blowing out when many such
+     paths occur during commit. (Robert Collins, #156491)
+
    * Committing a change which is not a merge and does not change the number of
      files in the tree is faster by utilising the data about whether files are
      changed to determine if the tree is unchanged rather than recalculating

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-10-24 06:48:13 +0000
+++ b/bzrlib/commit.py	2007-10-24 22:37:42 +0000
@@ -71,7 +71,9 @@
 from bzrlib.osutils import (kind_marker, isdir,isfile, is_inside_any,
                             is_inside_or_parent_of_any,
                             minimum_path_selection,
-                            quotefn, sha_file, split_lines)
+                            quotefn, sha_file, split_lines,
+                            splitpath,
+                            )
 from bzrlib.testament import Testament
 from bzrlib.trace import mutter, note, warning, is_quiet
 from bzrlib.xml5 import serializer_v5
@@ -703,7 +705,7 @@
                
         report_changes = self.reporter.is_verbose()
         deleted_ids = []
-        deleted_paths = set()
+        deleted_paths = {}
         # XXX: Note that entries may have the wrong kind because the entry does
         # not reflect the status on disk.
         work_inv = self.work_tree.inventory
@@ -717,16 +719,36 @@
             if kind == 'directory':
                 self._next_progress_entry()
             # Skip files that have been deleted from the working tree.
-            # The deleted files/directories are also recorded so they
-            # can be explicitly unversioned later. Note that when a
-            # filter of specific files is given, we must only skip/record
-            # deleted files matching that filter.
-            if is_inside_any(deleted_paths, path):
-                continue
+            # The deleted path ids are also recorded so they can be explicitly
+            # unversioned later.
+            if deleted_paths:
+                path_segments = splitpath(path)
+                deleted_dict = deleted_paths
+                for segment in path_segments:
+                    deleted_dict = deleted_dict.get(segment, None)
+                    if deleted_dict is None:
+                        # We took a path not present in the dict.
+                        break
+                    if not deleted_dict:
+                        # We've reached an empty child dir in the dict, so are now
+                        # a sub-path.
+                        break
+                else:
+                    deleted_dict = None
+                if deleted_dict is not None:
+                    # the path has a deleted parent, do not add it.
+                    continue
             content_summary = self.work_tree.path_content_summary(path)
+            # Note that when a filter of specific files is given, we must only
+            # skip/record deleted files matching that filter.
             if not specific_files or is_inside_any(specific_files, path):
                 if content_summary[0] == 'missing':
-                    deleted_paths.add(path)
+                    if not deleted_paths:
+                        # path won't have been split yet.
+                        path_segments = splitpath(path)
+                    deleted_dict = deleted_paths
+                    for segment in path_segments:
+                        deleted_dict = deleted_dict.setdefault(segment, {})
                     self.reporter.missing(path)
                     deleted_ids.append(file_id)
                     continue



More information about the bazaar-commits mailing list