Rev 2939: * Commit with many automatically found deleted paths no longer performs in http://people.ubuntu.com/~robertc/baz2.0/commit
Robert Collins
robertc at robertcollins.net
Wed Oct 24 23:37:59 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/commit
------------------------------------------------------------
revno: 2939
revision-id:robertc at robertcollins.net-20071024223742-fhjlj7l6lu77s9zq
parent: pqm at pqm.ubuntu.com-20071024181951-qqo4r5mqrhr032pf
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Thu 2007-10-25 08:37:42 +1000
message:
* Commit with many automatically found deleted paths no longer performs
linear scanning for the children of those paths during inventory
iteration. This should fix commit performance blowing out when many such
paths occur during commit. (Robert Collins, #156491)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
=== modified file 'NEWS'
--- a/NEWS 2007-10-24 17:19:20 +0000
+++ b/NEWS 2007-10-24 22:37:42 +0000
@@ -51,6 +51,11 @@
* Commit no longer checks for new text keys during insertion when the
revision id was deterministically unique. (Robert Collins)
+ * Commit with many automatically found deleted paths no longer performs
+ linear scanning for the children of those paths during inventory
+ iteration. This should fix commit performance blowing out when many such
+ paths occur during commit. (Robert Collins, #156491)
+
* Committing a change which is not a merge and does not change the number of
files in the tree is faster by utilising the data about whether files are
changed to determine if the tree is unchanged rather than recalculating
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-10-24 06:48:13 +0000
+++ b/bzrlib/commit.py 2007-10-24 22:37:42 +0000
@@ -71,7 +71,9 @@
from bzrlib.osutils import (kind_marker, isdir,isfile, is_inside_any,
is_inside_or_parent_of_any,
minimum_path_selection,
- quotefn, sha_file, split_lines)
+ quotefn, sha_file, split_lines,
+ splitpath,
+ )
from bzrlib.testament import Testament
from bzrlib.trace import mutter, note, warning, is_quiet
from bzrlib.xml5 import serializer_v5
@@ -703,7 +705,7 @@
report_changes = self.reporter.is_verbose()
deleted_ids = []
- deleted_paths = set()
+ deleted_paths = {}
# XXX: Note that entries may have the wrong kind because the entry does
# not reflect the status on disk.
work_inv = self.work_tree.inventory
@@ -717,16 +719,36 @@
if kind == 'directory':
self._next_progress_entry()
# Skip files that have been deleted from the working tree.
- # The deleted files/directories are also recorded so they
- # can be explicitly unversioned later. Note that when a
- # filter of specific files is given, we must only skip/record
- # deleted files matching that filter.
- if is_inside_any(deleted_paths, path):
- continue
+ # The deleted path ids are also recorded so they can be explicitly
+ # unversioned later.
+ if deleted_paths:
+ path_segments = splitpath(path)
+ deleted_dict = deleted_paths
+ for segment in path_segments:
+ deleted_dict = deleted_dict.get(segment, None)
+ if deleted_dict is None:
+ # We took a path not present in the dict.
+ break
+ if not deleted_dict:
+ # We've reached an empty child dir in the dict, so are now
+ # a sub-path.
+ break
+ else:
+ deleted_dict = None
+ if deleted_dict is not None:
+ # the path has a deleted parent, do not add it.
+ continue
content_summary = self.work_tree.path_content_summary(path)
+ # Note that when a filter of specific files is given, we must only
+ # skip/record deleted files matching that filter.
if not specific_files or is_inside_any(specific_files, path):
if content_summary[0] == 'missing':
- deleted_paths.add(path)
+ if not deleted_paths:
+ # path won't have been split yet.
+ path_segments = splitpath(path)
+ deleted_dict = deleted_paths
+ for segment in path_segments:
+ deleted_dict = deleted_dict.setdefault(segment, {})
self.reporter.missing(path)
deleted_ids.append(file_id)
continue
More information about the bazaar-commits
mailing list