Rev 3034: (robertc) Fix one cause of poor commit performance with many deleted in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Nov 27 02:37:52 GMT 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3034
revision-id:pqm at pqm.ubuntu.com-20071127023739-a1ajr28wi7so2up6
parent: pqm at pqm.ubuntu.com-20071127020038-4i8r0718qpokyky9
parent: robertc at robertcollins.net-20071127015529-4inebdo4aa2bfatv
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-11-27 02:37:39 +0000
message:
(robertc) Fix one cause of poor commit performance with many deleted
paths. (Robert Collins, #156491)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
------------------------------------------------------------
revno: 2938.3.2
revision-id:robertc at robertcollins.net-20071127015529-4inebdo4aa2bfatv
parent: robertc at robertcollins.net-20071024223742-fhjlj7l6lu77s9zq
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit.deleted-paths-speed
timestamp: Tue 2007-11-27 12:55:29 +1100
message:
Review feedback.
modified:
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
------------------------------------------------------------
revno: 2938.3.1
revision-id:robertc at robertcollins.net-20071024223742-fhjlj7l6lu77s9zq
parent: pqm at pqm.ubuntu.com-20071024181951-qqo4r5mqrhr032pf
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Thu 2007-10-25 08:37:42 +1000
message:
* Commit with many automatically found deleted paths no longer performs
linear scanning for the children of those paths during inventory
iteration. This should fix commit performance blowing out when many such
paths occur during commit. (Robert Collins, #156491)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
=== modified file 'NEWS'
--- a/NEWS 2007-11-27 01:20:01 +0000
+++ b/NEWS 2007-11-27 02:37:39 +0000
@@ -245,6 +245,11 @@
* Commit no longer checks for new text keys during insertion when the
revision id was deterministically unique. (Robert Collins)
+ * Commit with many automatically found deleted paths no longer performs
+ linear scanning for the children of those paths during inventory
+ iteration. This should fix commit performance blowing out when many such
+ paths occur during commit. (Robert Collins, #156491)
+
* Committing a change which is not a merge and does not change the number of
files in the tree is faster by utilising the data about whether files are
changed to determine if the tree is unchanged rather than recalculating
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-11-05 19:40:28 +0000
+++ b/bzrlib/commit.py 2007-11-27 02:37:39 +0000
@@ -71,7 +71,9 @@
from bzrlib.osutils import (kind_marker, isdir,isfile, is_inside_any,
is_inside_or_parent_of_any,
minimum_path_selection,
- quotefn, sha_file, split_lines)
+ quotefn, sha_file, split_lines,
+ splitpath,
+ )
from bzrlib.testament import Testament
from bzrlib.trace import mutter, note, warning, is_quiet
from bzrlib.xml5 import serializer_v5
@@ -694,7 +696,9 @@
report_changes = self.reporter.is_verbose()
deleted_ids = []
- deleted_paths = set()
+ # A tree of paths that have been deleted. E.g. if foo/bar has been
+ # deleted, then we have {'foo':{'bar':{}}}
+ deleted_paths = {}
# XXX: Note that entries may have the wrong kind because the entry does
# not reflect the status on disk.
work_inv = self.work_tree.inventory
@@ -708,16 +712,34 @@
if kind == 'directory':
self._next_progress_entry()
# Skip files that have been deleted from the working tree.
- # The deleted files/directories are also recorded so they
- # can be explicitly unversioned later. Note that when a
- # filter of specific files is given, we must only skip/record
- # deleted files matching that filter.
- if is_inside_any(deleted_paths, path):
- continue
+ # The deleted path ids are also recorded so they can be explicitly
+ # unversioned later.
+ if deleted_paths:
+ path_segments = splitpath(path)
+ deleted_dict = deleted_paths
+ for segment in path_segments:
+ deleted_dict = deleted_dict.get(segment, None)
+ if not deleted_dict:
+ # We either took a path not present in the dict
+ # (deleted_dict was None), or we've reached an empty
+ # child dir in the dict, so are now a sub-path.
+ break
+ else:
+ deleted_dict = None
+ if deleted_dict is not None:
+ # the path has a deleted parent, do not add it.
+ continue
content_summary = self.work_tree.path_content_summary(path)
+ # Note that when a filter of specific files is given, we must only
+ # skip/record deleted files matching that filter.
if not specific_files or is_inside_any(specific_files, path):
if content_summary[0] == 'missing':
- deleted_paths.add(path)
+ if not deleted_paths:
+ # path won't have been split yet.
+ path_segments = splitpath(path)
+ deleted_dict = deleted_paths
+ for segment in path_segments:
+ deleted_dict = deleted_dict.setdefault(segment, {})
self.reporter.missing(path)
deleted_ids.append(file_id)
continue
More information about the bazaar-commits
mailing list