Rev 2844: Faster partial commits by walking less data (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Fri Sep 21 06:13:18 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2844
revision-id: pqm at pqm.ubuntu.com-20070921051316-85muv96iv0duh31j
parent: pqm at pqm.ubuntu.com-20070921031326-72zmv08871klgb61
parent: ian.clatworthy at internode.on.net-20070921042253-eguu8ycfjbnb96yj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2007-09-21 06:13:16 +0100
message:
Faster partial commits by walking less data (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/tests/inventory_implementations/basics.py basics.py-20070903044446-kdjwbiu1p1zi9phs-1
bzrlib/tests/test_osutils.py test_osutils.py-20051201224856-e48ee24c12182989
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
------------------------------------------------------------
revno: 2843.1.1
merged: ian.clatworthy at internode.on.net-20070921042253-eguu8ycfjbnb96yj
parent: pqm at pqm.ubuntu.com-20070921031326-72zmv08871klgb61
parent: robertc at robertcollins.net-20070920070333-eedxrxidkignx4i1
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: ianc-integration2
timestamp: Fri 2007-09-21 14:22:53 +1000
message:
Faster partial commits by walking less data (Robert Collins)
------------------------------------------------------------
revno: 2825.7.1
merged: robertc at robertcollins.net-20070920070333-eedxrxidkignx4i1
parent: pqm at pqm.ubuntu.com-20070917005035-cshdkpzbj63id1uw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-specific-file-handling
timestamp: Thu 2007-09-20 17:03:33 +1000
message:
* Partial commits are now approximately 40% faster by walking over the
unselected current tree more efficiently. (Robert Collins)
* New method ``bzrlib.osutils.minimum_path_selection`` useful for removing
duplication from user input, when a user mentions both a path and an item
contained within that path. (Robert Collins)
* New parameter yield_parents on ``Inventory.iter_entries_by_dir`` which
causes the parents of a selected id to be returned recursively, so all the
paths from the root down to each element of selected_file_ids are
returned. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS 2007-09-21 03:13:26 +0000
+++ b/NEWS 2007-09-21 04:22:53 +0000
@@ -40,6 +40,9 @@
* Inventory serialisation no longer double-sha's the content.
(Robert Collins)
+ * Partial commits are now approximately 40% faster by walking over the
+ unselected current tree more efficiently. (Robert Collins)
+
* XML inventory serialisation takes 20% less time while being stricter about
the contents. (Robert Collins)
@@ -96,6 +99,10 @@
INTERNALS:
+ * New method ``bzrlib.osutils.minimum_path_selection`` useful for removing
+ duplication from user input, when a user mentions both a path and an item
+ contained within that path. (Robert Collins)
+
* New method on xml serialisers, write_inventory_to_lines, which matches the
API used by knits for adding content. (Robert Collins)
@@ -103,6 +110,11 @@
and returns a gzipped version of the same. This is used to avoid a bunch
of api friction during adding of knit hunks. (Robert Collins)
+ * New parameter yield_parents on ``Inventory.iter_entries_by_dir`` which
+ causes the parents of a selected id to be returned recursively, so all the
+ paths from the root down to each element of selected_file_ids are
+ returned. (Robert Collins)
+
TESTING:
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-09-21 00:22:35 +0000
+++ b/bzrlib/commit.py 2007-09-21 04:22:53 +0000
@@ -68,8 +68,9 @@
ConflictsInTree,
StrictCommitFailed
)
-from bzrlib.osutils import (kind_marker, isdir,isfile, is_inside_any,
+from bzrlib.osutils import (kind_marker, isdir,isfile, is_inside_any,
is_inside_or_parent_of_any,
+ minimum_path_selection,
quotefn, sha_file, split_lines)
from bzrlib.testament import Testament
from bzrlib.trace import mutter, note, warning, is_quiet
@@ -247,7 +248,12 @@
self.master_branch = None
self.master_locked = False
self.rev_id = None
- self.specific_files = specific_files
+ if specific_files is not None:
+ self.specific_files = sorted(
+ minimum_path_selection(specific_files))
+ else:
+ self.specific_files = None
+ self.specific_file_ids = None
self.allow_pointless = allow_pointless
self.recursive = recursive
self.revprops = revprops
@@ -282,14 +288,17 @@
self.config = self.branch.get_config()
# If provided, ensure the specified files are versioned
- if specific_files is not None:
- # Note: We don't actually need the IDs here. This routine
+ if self.specific_files is not None:
+ # Note: This routine
# is being called because it raises PathNotVerisonedError
- # as a side effect of finding the IDs.
+ # as a side effect of finding the IDs. We later use the ids we
+ # found as input to the working tree inventory iterator, so we
+ # only consider those ids rather than examining the whole tree
+ # again.
# XXX: Dont we have filter_unversioned to do this more
# cheaply?
- tree.find_ids_across_trees(specific_files,
- [self.basis_tree, self.work_tree])
+ self.specific_file_ids = tree.find_ids_across_trees(
+ specific_files, [self.basis_tree, self.work_tree])
# Setup the progress bar. As the number of files that need to be
# committed in unknown, progress is reported as stages.
@@ -639,13 +648,19 @@
# recorded in their previous state. For more details, see
# https://lists.ubuntu.com/archives/bazaar/2007q3/028476.html.
if specific_files:
- for path, new_ie in self.basis_inv.iter_entries():
- if new_ie.file_id in self.builder.new_inventory:
+ for path, old_ie in self.basis_inv.iter_entries():
+ if old_ie.file_id in self.builder.new_inventory:
continue
if is_inside_any(specific_files, path):
continue
- ie = new_ie.copy()
- ie.revision = None
+ if old_ie.kind == 'directory':
+ self._next_progress_entry()
+ ie = old_ie.copy()
+ # Note: specific file commits after a merge are currently
+ # prohibited. This test is for sanity/safety in case it's
+ # required after that changes.
+ if len(self.parents) > 1:
+ ie.revision = None
if self.builder.record_entry_contents(ie, self.parent_invs, path,
self.basis_tree):
self.any_entries_changed = True
@@ -671,7 +686,8 @@
deleted_paths = set()
work_inv = self.work_tree.inventory
assert work_inv.root is not None
- entries = work_inv.iter_entries_by_dir()
+ entries = work_inv.iter_entries_by_dir(
+ specific_file_ids=self.specific_file_ids, yield_parents=True)
if not self.builder.record_root_entry:
entries.next()
for path, existing_ie in entries:
@@ -681,7 +697,6 @@
kind = existing_ie.kind
if kind == 'directory':
self._next_progress_entry()
-
# Skip files that have been deleted from the working tree.
# The deleted files/directories are also recorded so they
# can be explicitly unversioned later. Note that when a
@@ -689,12 +704,11 @@
# deleted files matching that filter.
if is_inside_any(deleted_paths, path):
continue
- if not specific_files or is_inside_any(specific_files, path):
- if not self.work_tree.has_filename(path):
- deleted_paths.add(path)
- self.reporter.missing(path)
- deleted_ids.append(file_id)
- continue
+ if not self.work_tree.has_filename(path):
+ deleted_paths.add(path)
+ self.reporter.missing(path)
+ deleted_ids.append(file_id)
+ continue
try:
kind = self.work_tree.kind(file_id)
# TODO: specific_files filtering before nested tree processing
@@ -744,27 +758,17 @@
report_changes=True):
"Record the new inventory entry for a path if any."
# mutter('check %s {%s}', path, file_id)
- if (not specific_files or
- is_inside_or_parent_of_any(specific_files, path)):
- # mutter('%s selected for commit', path)
- if definitely_changed or existing_ie is None:
- ie = inventory.make_entry(kind, name, parent_id, file_id)
- else:
- ie = existing_ie.copy()
- ie.revision = None
+ # mutter('%s selected for commit', path)
+ if definitely_changed or existing_ie is None:
+ ie = inventory.make_entry(kind, name, parent_id, file_id)
else:
- # mutter('%s not selected for commit', path)
- if self.basis_inv.has_id(file_id):
- ie = self.basis_inv[file_id].copy()
- else:
- # this entry is new and not being committed
- ie = None
- if ie is not None:
- if self.builder.record_entry_contents(ie, self.parent_invs,
- path, self.work_tree):
- self.any_entries_changed = True
- if report_changes:
- self._report_change(ie, path)
+ ie = existing_ie.copy()
+ ie.revision = None
+ if self.builder.record_entry_contents(ie, self.parent_invs,
+ path, self.work_tree):
+ self.any_entries_changed = True
+ if report_changes:
+ self._report_change(ie, path)
return ie
def _report_change(self, ie, path):
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-09-17 05:33:56 +0000
+++ b/bzrlib/dirstate.py 2007-09-21 04:22:53 +0000
@@ -1470,7 +1470,8 @@
kind = inv_entry.kind
minikind = DirState._kind_to_minikind[kind]
tree_data = inv_entry.revision
- assert len(tree_data) > 0, 'empty revision for the inv_entry.'
+ assert tree_data, 'empty revision for the inv_entry %s.' % \
+ inv_entry.file_id
if kind == 'directory':
fingerprint = ''
size = 0
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2007-09-21 03:13:26 +0000
+++ b/bzrlib/inventory.py 2007-09-21 04:22:53 +0000
@@ -1011,7 +1011,8 @@
# if we finished all children, pop it off the stack
stack.pop()
- def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None):
+ def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None,
+ yield_parents=False):
"""Iterate over the entries in a directory first order.
This returns all entries for a directory before returning
@@ -1019,6 +1020,9 @@
lexicographically sorted order, and is a hybrid between
depth-first and breadth-first.
+ :param yield_parents: If True, yield the parents from the root leading
+ down to specific_file_ids that have been requested. This has no
+ impact if specific_file_ids is None.
:return: This yields (path, entry) pairs
"""
if specific_file_ids:
@@ -1030,13 +1034,14 @@
if self.root is None:
return
# Optimize a common case
- if specific_file_ids is not None and len(specific_file_ids) == 1:
+ if (not yield_parents and specific_file_ids is not None and
+ len(specific_file_ids) == 1):
file_id = list(specific_file_ids)[0]
if file_id in self:
yield self.id2path(file_id), self[file_id]
return
from_dir = self.root
- if (specific_file_ids is None or
+ if (specific_file_ids is None or yield_parents or
self.root.file_id in specific_file_ids):
yield u'', self.root
elif isinstance(from_dir, basestring):
@@ -1071,7 +1076,8 @@
child_relpath = cur_relpath + child_name
if (specific_file_ids is None or
- child_ie.file_id in specific_file_ids):
+ child_ie.file_id in specific_file_ids or
+ (yield_parents and child_ie.file_id in parents)):
yield child_relpath, child_ie
if child_ie.kind == 'directory':
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2007-09-20 09:30:39 +0000
+++ b/bzrlib/osutils.py 2007-09-21 04:22:53 +0000
@@ -87,6 +87,23 @@
os.chmod(filename, mod)
+def minimum_path_selection(paths):
+ """Return the smallset subset of paths which are outside paths.
+
+ :param paths: A container (and hence not None) of paths.
+ :return: A set of paths sufficient to include everything in paths via
+ is_inside_any, drawn from the paths parameter.
+ """
+ search_paths = set()
+ paths = set(paths)
+ for path in paths:
+ other_paths = paths.difference([path])
+ if not is_inside_any(other_paths, path):
+ # this is a top level path, we must check it.
+ search_paths.add(path)
+ return search_paths
+
+
_QUOTE_RE = None
=== modified file 'bzrlib/tests/inventory_implementations/basics.py'
--- a/bzrlib/tests/inventory_implementations/basics.py 2007-09-03 04:45:38 +0000
+++ b/bzrlib/tests/inventory_implementations/basics.py 2007-09-20 07:03:33 +0000
@@ -202,6 +202,13 @@
], [(path, ie.file_id) for path, ie in inv.iter_entries_by_dir(
specific_file_ids=('bye-id',))])
+ self.assertEqual([
+ ('', 'tree-root'),
+ ('src', 'src-id'),
+ ('src/bye.c', 'bye-id'),
+ ], [(path, ie.file_id) for path, ie in inv.iter_entries_by_dir(
+ specific_file_ids=('bye-id',), yield_parents=True)])
+
def test_add_recursive(self):
parent = InventoryDirectory('src-id', 'src', 'tree-root')
child = InventoryFile('hello-id', 'hello.c', 'src-id')
=== modified file 'bzrlib/tests/test_osutils.py'
--- a/bzrlib/tests/test_osutils.py 2007-09-04 11:00:30 +0000
+++ b/bzrlib/tests/test_osutils.py 2007-09-20 07:03:33 +0000
@@ -477,6 +477,16 @@
# osutils.getcwd() renormalize the path.
self.assertEndsWith(osutils._win32_getcwd(), u'mu-\xb5')
+ def test_minimum_path_selection(self):
+ self.assertEqual(set(),
+ osutils.minimum_path_selection([]))
+ self.assertEqual(set(['a', 'b']),
+ osutils.minimum_path_selection(['a', 'b']))
+ self.assertEqual(set(['a/', 'b']),
+ osutils.minimum_path_selection(['a/', 'b']))
+ self.assertEqual(set(['a/', 'b']),
+ osutils.minimum_path_selection(['a/c', 'a/', 'b']))
+
def test_mkdtemp(self):
tmpdir = osutils._win32_mkdtemp(dir='.')
self.assertFalse('\\' in tmpdir)
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-09-14 02:25:32 +0000
+++ b/bzrlib/workingtree_4.py 2007-09-20 07:03:33 +0000
@@ -946,12 +946,7 @@
if not all_versioned:
raise errors.PathsNotVersionedError(paths)
# -- remove redundancy in supplied paths to prevent over-scanning --
- search_paths = set()
- for path in paths:
- other_paths = paths.difference(set([path]))
- if not osutils.is_inside_any(other_paths, path):
- # this is a top level path, we must check it.
- search_paths.add(path)
+ search_paths = osutils.minimum_path_selection(paths)
# sketch:
# for all search_indexs in each path at or under each element of
# search_paths, if the detail is relocated: add the id, and add the
More information about the bazaar-commits
mailing list