Rev 4802: Support state on per-file merging to permit more efficient use of configuration data. in http://bazaar.launchpad.net/~lifeless/bzr/bug-513822
Robert Collins
robertc at robertcollins.net
Thu Jan 28 17:27:33 GMT 2010
At http://bazaar.launchpad.net/~lifeless/bzr/bug-513822
------------------------------------------------------------
revno: 4802
revision-id: robertc at robertcollins.net-20100128172716-i6s7itz6flgxl66i
parent: pqm at pqm.ubuntu.com-20100121221710-2vc21zjy3lnxbg6y
fixes bug(s): https://launchpad.net/bugs/513822
committer: Robert Collins <robertc at robertcollins.net>
branch nick: bug-513822
timestamp: Fri 2010-01-29 04:27:16 +1100
message:
Support state on per-file merging to permit more efficient use of configuration data.
=== modified file 'NEWS'
--- a/NEWS 2010-01-21 21:13:09 +0000
+++ b/NEWS 2010-01-28 17:27:16 +0000
@@ -5,6 +5,18 @@
.. contents:: List of Releases
:depth: 1
+bzr 2.1.0rc2
+############
+
+API Changes
+***********
+
+* The new ``merge_file_content`` hook point has been altered to provide a
+ better API where state for extensions can be stored rather than the
+ too-simple function based approach. This fixes a performance regression
+ where branch configuration would be parsed per-file during merge.
+ (Robert Collins, John Arbash Meinel, #513822)
+
bzr 2.1.0rc1
############
=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py 2010-01-20 16:05:28 +0000
+++ b/bzrlib/merge.py 2010-01-28 17:27:16 +0000
@@ -57,23 +57,47 @@
def __init__(self):
hooks.Hooks.__init__(self)
self.create_hook(hooks.HookPoint('merge_file_content',
- "Called when file content needs to be merged (including when one "
- "side has deleted the file and the other has changed it)."
- "merge_file_content is called with a "
- "bzrlib.merge.MergeHookParams. The function should return a tuple "
- "of (status, lines), where status is one of 'not_applicable', "
- "'success', 'conflicted', or 'delete'. If status is success or "
- "conflicted, then lines should be an iterable of strings of the "
- "new file contents.",
+ "Called with a bzrlib.merge.Merger object to create a per file "
+ "merge object when starting a merge. "
+ "Should return either None or a subclass of "
+ "``bzrlib.merge.AbstractPerFileMerger``. "
+ "Such objects will then be called per file "
+ "that needs to be merged (including when one "
+ "side has deleted the file and the other has changed it). "
+ "See the AbstractPerFileMerger API docs for details on how it is "
+ "used by merge.",
(2, 1), None))
+class AbstractPerFileMerger(object):
+ """PerFileMerger objects are used by plugins extending merge for bzrlib.
+
+ See ``bzrlib.plugins.news_merge.news_merge`` for an example concrete class.
+
+ :ivar merger: The Merge3Merger performing the merge.
+ """
+
+ def __init__(self, merger):
+ """Create a PerFileMerger for use with merger."""
+ self.merger = merger
+
+ def merge_contents(self, merge_params):
+ """Attempt to merge the contents of a single file.
+
+ :param merge_params: A bzrlib.merge.MergeHookParams
+ :return : A tuple of (status, chunks), where status is one of
+ 'not_applicable', 'success', 'conflicted', or 'delete'. If status
+ is 'success' or 'conflicted', then chunks should be an iterable of
+ strings for the new file contents.
+ """
+ return ('not applicable', None)
+
+
class MergeHookParams(object):
"""Object holding parameters passed to merge_file_content hooks.
- There are 3 fields hooks can access:
+ There are some fields hooks can access:
- :ivar merger: the Merger object
:ivar file_id: the file ID of the file being merged
:ivar trans_id: the transform ID for the merge of this file
:ivar this_kind: kind of file_id in 'this' tree
@@ -83,7 +107,7 @@
def __init__(self, merger, file_id, trans_id, this_kind, other_kind,
winner):
- self.merger = merger
+ self._merger = merger
self.file_id = file_id
self.trans_id = trans_id
self.this_kind = this_kind
@@ -97,17 +121,17 @@
@decorators.cachedproperty
def base_lines(self):
"""The lines of the 'base' version of the file."""
- return self.merger.get_lines(self.merger.base_tree, self.file_id)
+ return self._merger.get_lines(self._merger.base_tree, self.file_id)
@decorators.cachedproperty
def this_lines(self):
"""The lines of the 'this' version of the file."""
- return self.merger.get_lines(self.merger.this_tree, self.file_id)
+ return self._merger.get_lines(self._merger.this_tree, self.file_id)
@decorators.cachedproperty
def other_lines(self):
"""The lines of the 'other' version of the file."""
- return self.merger.get_lines(self.merger.other_tree, self.file_id)
+ return self._merger.get_lines(self._merger.other_tree, self.file_id)
class Merger(object):
@@ -708,12 +732,15 @@
resolver = self._lca_multi_way
child_pb = ui.ui_factory.nested_progress_bar()
try:
+ factories = Merger.hooks['merge_file_content']
+ hooks = [factory(self) for factory in factories] + [self]
+ self.active_hooks = [hook for hook in hooks if hook is not None]
for num, (file_id, changed, parents3, names3,
executable3) in enumerate(entries):
child_pb.update('Preparing file merge', num, len(entries))
self._merge_names(file_id, parents3, names3, resolver=resolver)
if changed:
- file_status = self.merge_contents(file_id)
+ file_status = self._do_merge_contents(file_id)
else:
file_status = 'unmodified'
self._merge_executable(file_id,
@@ -1158,7 +1185,7 @@
self.tt.adjust_path(names[self.winner_idx[name_winner]],
parent_trans_id, trans_id)
- def merge_contents(self, file_id):
+ def _do_merge_contents(self, file_id):
"""Performs a merge on file_id contents."""
def contents_pair(tree):
if file_id not in tree:
@@ -1198,11 +1225,10 @@
trans_id = self.tt.trans_id_file_id(file_id)
params = MergeHookParams(self, file_id, trans_id, this_pair[0],
other_pair[0], winner)
- hooks = Merger.hooks['merge_file_content']
- hooks = list(hooks) + [self.default_text_merge]
+ hooks = self.active_hooks
hook_status = 'not_applicable'
for hook in hooks:
- hook_status, lines = hook(params)
+ hook_status, lines = hook.merge_contents(params)
if hook_status != 'not_applicable':
# Don't try any more hooks, this one applies.
break
@@ -1279,7 +1305,11 @@
'winner is OTHER, but file_id %r not in THIS or OTHER tree'
% (file_id,))
- def default_text_merge(self, merge_hook_params):
+ def merge_contents(self, merge_hook_params):
+ """Fallback merge logic after user installed hooks."""
+ # This function is used in merge hooks as the fallback instance.
+ # Perhaps making this function and the functions it calls be a
+ # a separate class would be better.
if merge_hook_params.winner == 'other':
# OTHER is a straight winner, so replace this contents with other
return self._default_other_winner_merge(merge_hook_params)
=== modified file 'bzrlib/plugins/news_merge/__init__.py'
--- a/bzrlib/plugins/news_merge/__init__.py 2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/__init__.py 2010-01-28 17:27:16 +0000
@@ -39,46 +39,16 @@
# overhead of this plugin as minimal as possible.
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
-from bzrlib.plugins.news_merge.news_merge import news_merger
+from bzrlib.plugins.news_merge import news_merge as _mod_news_merge
""")
from bzrlib.merge import Merger
-def news_merge_hook(params):
- """Merger.merge_file_content hook function for bzr-format NEWS files."""
- # First, check whether this custom merge logic should be used. We expect
- # most files should not be merged by this file.
- if params.winner == 'other':
- # OTHER is a straight winner, rely on default merge.
- return 'not_applicable', None
- elif not params.is_file_merge():
- # THIS and OTHER aren't both files.
- return 'not_applicable', None
- elif not filename_matches_config(params):
- # The filename isn't listed in the 'news_merge_files' config option.
- return 'not_applicable', None
- return news_merger(params)
-
-
-def filename_matches_config(params):
- affected_files = getattr(params, '_news_merge_affected_files', None)
- if affected_files is None:
- config = params.merger.this_tree.branch.get_config()
- # Until bzr provides a better policy for caching the config, we just
- # add the part we're interested in to the params to avoid reading the
- # config files repeatedly (bazaar.conf, location.conf, branch.conf).
- affected_files = config.get_user_option_as_list('news_merge_files')
- if affected_files is None:
- # If nothing was specified in the config, we have nothing to do,
- # but we use None in the params to start the caching.
- affected_files = []
- params._news_merge_affected_files = affected_files
- if affected_files:
- filename = params.merger.this_tree.id2path(params.file_id)
- if filename in affected_files:
- return True
- return False
+def news_merge_hook(merger):
+ """Merger.merge_file_content hook for bzr-format NEWS files."""
+ return _mod_news_merge.NewsMerger(merger)
+
def install_hook():
Merger.hooks.install_named_hook(
=== modified file 'bzrlib/plugins/news_merge/news_merge.py'
--- a/bzrlib/plugins/news_merge/news_merge.py 2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/news_merge.py 2010-01-28 17:27:16 +0000
@@ -18,54 +18,98 @@
from bzrlib.plugins.news_merge.parser import simple_parse
-from bzrlib import merge3
+from bzrlib import merge, merge3
magic_marker = '|NEWS-MERGE-MAGIC-MARKER|'
-def news_merger(params):
- """Perform a simple 3-way merge of a bzr NEWS file.
-
- Each section of a bzr NEWS file is essentially an ordered set of bullet
- points, so we can simply take a set of bullet points, determine which
- bullets to add and which to remove, sort, and reserialize.
+class NewsMerger(merge.AbstractPerFileMerger):
+ """Merge bzr NEWS files.
+
+ :ivar: affected_files.
"""
- # Transform the different versions of the NEWS file into a bunch of text
- # lines where each line matches one part of the overall structure, e.g. a
- # heading or bullet.
- def munge(lines):
- return list(blocks_to_fakelines(simple_parse(''.join(lines))))
- this_lines = munge(params.this_lines)
- other_lines = munge(params.other_lines)
- base_lines = munge(params.base_lines)
- m3 = merge3.Merge3(base_lines, this_lines, other_lines)
- result_lines = []
- for group in m3.merge_groups():
- if group[0] == 'conflict':
- _, base, a, b = group
- # Are all the conflicting lines bullets? If so, we can merge this.
- for line_set in [base, a, b]:
- for line in line_set:
- if not line.startswith('bullet'):
- # Something else :(
- # Maybe the default merge can cope.
- return 'not_applicable', None
- # Calculate additions and deletions.
- new_in_a = set(a).difference(base)
- new_in_b = set(b).difference(base)
- all_new = new_in_a.union(new_in_b)
- deleted_in_a = set(base).difference(a)
- deleted_in_b = set(base).difference(b)
- # Combine into the final set of bullet points.
- final = all_new.difference(deleted_in_a).difference(deleted_in_b)
- # Sort, and emit.
- final = sorted(final, key=sort_key)
- result_lines.extend(final)
- else:
- result_lines.extend(group[1])
- # Transform the merged elements back into real blocks of lines.
- return 'success', list(fakelines_to_blocks(result_lines))
+
+ def __init__(self, merger):
+ super(NewsMerger, self).__init__(merger)
+ self.affected_files = None
+
+ def merge_contents(self, params):
+ """Perform a simple 3-way merge of a bzr NEWS file.
+
+ Each section of a bzr NEWS file is essentially an ordered set of bullet
+ points, so we can simply take a set of bullet points, determine which
+ bullets to add and which to remove, sort, and reserialize.
+ """
+ # First, check whether this custom merge logic should be used. We
+ # expect most files should not be merged by this file.
+ if params.winner == 'other':
+ # OTHER is a straight winner, rely on default merge.
+ return 'not_applicable', None
+ elif not params.is_file_merge():
+ # THIS and OTHER aren't both files.
+ return 'not_applicable', None
+ elif not self.filename_matches_config(params):
+ # The filename isn't listed in the 'news_merge_files' config
+ # option.
+ return 'not_applicable', None
+ # Transform the different versions of the NEWS file into a bunch of
+ # text lines where each line matches one part of the overall
+ # structure, e.g. a heading or bullet.
+ def munge(lines):
+ return list(blocks_to_fakelines(simple_parse(''.join(lines))))
+ this_lines = munge(params.this_lines)
+ other_lines = munge(params.other_lines)
+ base_lines = munge(params.base_lines)
+ m3 = merge3.Merge3(base_lines, this_lines, other_lines)
+ result_lines = []
+ for group in m3.merge_groups():
+ if group[0] == 'conflict':
+ _, base, a, b = group
+ # Are all the conflicting lines bullets? If so, we can merge
+ # this.
+ for line_set in [base, a, b]:
+ for line in line_set:
+ if not line.startswith('bullet'):
+ # Something else :(
+ # Maybe the default merge can cope.
+ return 'not_applicable', None
+ # Calculate additions and deletions.
+ new_in_a = set(a).difference(base)
+ new_in_b = set(b).difference(base)
+ all_new = new_in_a.union(new_in_b)
+ deleted_in_a = set(base).difference(a)
+ deleted_in_b = set(base).difference(b)
+ # Combine into the final set of bullet points.
+ final = all_new.difference(deleted_in_a).difference(
+ deleted_in_b)
+ # Sort, and emit.
+ final = sorted(final, key=sort_key)
+ result_lines.extend(final)
+ else:
+ result_lines.extend(group[1])
+ # Transform the merged elements back into real blocks of lines.
+ return 'success', list(fakelines_to_blocks(result_lines))
+
+ def filename_matches_config(self, params):
+ affected_files = self.affected_files
+ if affected_files is None:
+ config = self.merger.this_tree.branch.get_config()
+ # Until bzr provides a better policy for caching the config, we
+ # just add the part we're interested in to the params to avoid
+ # reading the config files repeatedly (bazaar.conf, location.conf,
+ # branch.conf).
+ affected_files = config.get_user_option_as_list('news_merge_files')
+ if affected_files is None:
+ # If nothing was specified in the config, we have nothing to do,
+ # but we use None in the params to start the caching.
+ affected_files = []
+ self.affected_files = affected_files
+ if affected_files:
+ filename = params.merger.this_tree.id2path(params.file_id)
+ if filename in affected_files:
+ return True
+ return False
def blocks_to_fakelines(blocks):
=== modified file 'bzrlib/plugins/news_merge/tests/test_news_merge.py'
--- a/bzrlib/plugins/news_merge/tests/test_news_merge.py 2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/tests/test_news_merge.py 2010-01-28 17:27:16 +0000
@@ -21,7 +21,9 @@
option,
tests,
)
+from bzrlib.merge import Merger
from bzrlib.plugins import news_merge
+import bzrlib.plugins.news_merge.news_merge
from bzrlib.tests import test_merge_core
@@ -29,23 +31,16 @@
def test_affected_files_cached(self):
"""Ensures that the config variable is cached"""
- news_merge.install_hook()
- self.affected_files = None
- orig = news_merge.filename_matches_config
- def wrapper(params):
- ret = orig(params)
- # Capture the value set by the hook
- self.affected_files = params._news_merge_affected_files
- return ret
- def restore():
- news_merge.filename_matches_config = orig
- self.addCleanup(restore)
- news_merge.filename_matches_config = wrapper
-
+ def make_news_hook(merger):
+ result = news_merge.news_merge.NewsMerger(merger)
+ self.merger = result
+ return result
+ Merger.hooks.install_named_hook( 'merge_file_content', make_news_hook,
+ 'test NEWS file merge')
builder = test_merge_core.MergeBuilder(self.test_base_dir)
self.addCleanup(builder.cleanup)
builder.add_file('NEWS', builder.tree_root, 'name1', 'text1', True)
builder.change_contents('NEWS', other='text4', this='text3')
conflicts = builder.merge()
# The hook should set the variable
- self.assertIsNot(None, self.affected_files)
+ self.assertIsNot(None, self.merger.affected_files)
=== modified file 'bzrlib/tests/per_merger.py'
--- a/bzrlib/tests/per_merger.py 2010-01-20 16:05:28 +0000
+++ b/bzrlib/tests/per_merger.py 2010-01-28 17:27:16 +0000
@@ -198,55 +198,85 @@
TestCaseWithTransport.setUp(self)
self.hook_log = []
+ def install_hook_inactive(self):
+ def inactive_factory(merger):
+ # This hook is never active
+ self.hook_log.append(('inactive',))
+ return None
+ _mod_merge.Merger.hooks.install_named_hook(
+ 'merge_file_content', inactive_factory, 'test hook (inactive)')
+
def install_hook_noop(self):
- def hook_na(merge_params):
- # This hook unconditionally does nothing.
- self.hook_log.append(('no-op',))
- return 'not_applicable', None
+ test = self
+ class HookNA(_mod_merge.AbstractPerFileMerger):
+ def merge_contents(self, merge_params):
+ # This hook unconditionally does nothing.
+ test.hook_log.append(('no-op',))
+ return 'not_applicable', None
+ def hook_na_factory(merger):
+ return HookNA(merger)
_mod_merge.Merger.hooks.install_named_hook(
- 'merge_file_content', hook_na, 'test hook (no-op)')
+ 'merge_file_content', hook_na_factory, 'test hook (no-op)')
def install_hook_success(self):
- def hook_success(merge_params):
- self.hook_log.append(('success',))
- if merge_params.file_id == '1':
- return 'success', ['text-merged-by-hook']
- return 'not_applicable', None
+ test = self
+ class HookSuccess(_mod_merge.AbstractPerFileMerger):
+ def merge_contents(self, merge_params):
+ test.hook_log.append(('success',))
+ if merge_params.file_id == '1':
+ return 'success', ['text-merged-by-hook']
+ return 'not_applicable', None
+ def hook_success_factory(merger):
+ return HookSuccess(merger)
_mod_merge.Merger.hooks.install_named_hook(
- 'merge_file_content', hook_success, 'test hook (success)')
+ 'merge_file_content', hook_success_factory, 'test hook (success)')
def install_hook_conflict(self):
- def hook_conflict(merge_params):
- self.hook_log.append(('conflict',))
- if merge_params.file_id == '1':
- return 'conflicted', ['text-with-conflict-markers-from-hook']
- return 'not_applicable', None
+ test = self
+ class HookConflict(_mod_merge.AbstractPerFileMerger):
+ def merge_contents(self, merge_params):
+ test.hook_log.append(('conflict',))
+ if merge_params.file_id == '1':
+ return ('conflicted',
+ ['text-with-conflict-markers-from-hook'])
+ return 'not_applicable', None
+ def hook_conflict_factory(merger):
+ return HookConflict(merger)
_mod_merge.Merger.hooks.install_named_hook(
- 'merge_file_content', hook_conflict, 'test hook (delete)')
+ 'merge_file_content', hook_conflict_factory, 'test hook (delete)')
def install_hook_delete(self):
- def hook_delete(merge_params):
- self.hook_log.append(('delete',))
- if merge_params.file_id == '1':
- return 'delete', None
- return 'not_applicable', None
+ test = self
+ class HookDelete(_mod_merge.AbstractPerFileMerger):
+ def merge_contents(self, merge_params):
+ test.hook_log.append(('delete',))
+ if merge_params.file_id == '1':
+ return 'delete', None
+ return 'not_applicable', None
+ def hook_delete_factory(merger):
+ return HookDelete(merger)
_mod_merge.Merger.hooks.install_named_hook(
- 'merge_file_content', hook_delete, 'test hook (delete)')
+ 'merge_file_content', hook_delete_factory, 'test hook (delete)')
def install_hook_log_lines(self):
"""Install a hook that saves the get_lines for the this, base and other
versions of the file.
"""
- def hook_log_lines(merge_params):
- self.hook_log.append((
- 'log_lines',
- merge_params.this_lines,
- merge_params.other_lines,
- merge_params.base_lines,
- ))
- return 'not_applicable', None
+ test = self
+ class HookLogLines(_mod_merge.AbstractPerFileMerger):
+ def merge_contents(self, merge_params):
+ test.hook_log.append((
+ 'log_lines',
+ merge_params.this_lines,
+ merge_params.other_lines,
+ merge_params.base_lines,
+ ))
+ return 'not_applicable', None
+ def hook_log_lines_factory(merger):
+ return HookLogLines(merger)
_mod_merge.Merger.hooks.install_named_hook(
- 'merge_file_content', hook_log_lines, 'test hook (log_lines)')
+ 'merge_file_content', hook_log_lines_factory,
+ 'test hook (log_lines)')
def make_merge_builder(self):
builder = MergeBuilder(self.test_base_dir)
@@ -315,6 +345,18 @@
self.assertEqual(
[('log_lines', ['text2'], ['text3'], ['text1'])], self.hook_log)
+ def test_chain_when_not_active(self):
+ """When a hook function returns None, merging still works."""
+ self.install_hook_inactive()
+ self.install_hook_success()
+ builder = self.make_merge_builder()
+ self.create_file_needing_contents_merge(builder, "1")
+ conflicts = builder.merge(self.merge_type)
+ self.assertEqual(conflicts, [])
+ self.assertEqual(
+ builder.this.get_file('1').read(), 'text-merged-by-hook')
+ self.assertEqual([('inactive',), ('success',)], self.hook_log)
+
def test_chain_when_not_applicable(self):
"""When a hook function returns not_applicable, the next function is
tried (when one exists).
More information about the bazaar-commits
mailing list