Rev 4802: Support state on per-file merging to permit more efficient use of configuration data. in http://bazaar.launchpad.net/~lifeless/bzr/bug-513822

Robert Collins robertc at robertcollins.net
Thu Jan 28 17:27:33 GMT 2010


At http://bazaar.launchpad.net/~lifeless/bzr/bug-513822

------------------------------------------------------------
revno: 4802
revision-id: robertc at robertcollins.net-20100128172716-i6s7itz6flgxl66i
parent: pqm at pqm.ubuntu.com-20100121221710-2vc21zjy3lnxbg6y
fixes bug(s): https://launchpad.net/bugs/513822
committer: Robert Collins <robertc at robertcollins.net>
branch nick: bug-513822
timestamp: Fri 2010-01-29 04:27:16 +1100
message:
  Support state on per-file merging to permit more efficient use of configuration data.
=== modified file 'NEWS'
--- a/NEWS	2010-01-21 21:13:09 +0000
+++ b/NEWS	2010-01-28 17:27:16 +0000
@@ -5,6 +5,18 @@
 .. contents:: List of Releases
    :depth: 1
 
+bzr 2.1.0rc2
+############
+
+API Changes
+***********
+
+* The new ``merge_file_content`` hook point has been altered to provide a
+  better API where state for extensions can be stored rather than the
+  too-simple function based approach. This fixes a performance regression
+  where branch configuration would be parsed per-file during merge.
+  (Robert Collins, John Arbash Meinel, #513822)
+
 bzr 2.1.0rc1
 ############
 

=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py	2010-01-20 16:05:28 +0000
+++ b/bzrlib/merge.py	2010-01-28 17:27:16 +0000
@@ -57,23 +57,47 @@
     def __init__(self):
         hooks.Hooks.__init__(self)
         self.create_hook(hooks.HookPoint('merge_file_content',
-            "Called when file content needs to be merged (including when one "
-            "side has deleted the file and the other has changed it)."
-            "merge_file_content is called with a "
-            "bzrlib.merge.MergeHookParams. The function should return a tuple "
-            "of (status, lines), where status is one of 'not_applicable', "
-            "'success', 'conflicted', or 'delete'.  If status is success or "
-            "conflicted, then lines should be an iterable of strings of the "
-            "new file contents.",
+            "Called with a bzrlib.merge.Merger object to create a per file "
+            "merge object when starting a merge. "
+            "Should return either None or a subclass of "
+            "``bzrlib.merge.AbstractPerFileMerger``. "
+            "Such objects will then be called per file "
+            "that needs to be merged (including when one "
+            "side has deleted the file and the other has changed it). "
+            "See the AbstractPerFileMerger API docs for details on how it is "
+            "used by merge.",
             (2, 1), None))
 
 
+class AbstractPerFileMerger(object):
+    """PerFileMerger objects are used by plugins extending merge for bzrlib.
+
+    See ``bzrlib.plugins.news_merge.news_merge`` for an example concrete class.
+    
+    :ivar merger: The Merge3Merger performing the merge.
+    """
+
+    def __init__(self, merger):
+        """Create a PerFileMerger for use with merger."""
+        self.merger = merger
+
+    def merge_contents(self, merge_params):
+        """Attempt to merge the contents of a single file.
+        
+        :param merge_params: A bzrlib.merge.MergeHookParams
+        :return : A tuple of (status, chunks), where status is one of
+            'not_applicable', 'success', 'conflicted', or 'delete'.  If status
+            is 'success' or 'conflicted', then chunks should be an iterable of
+            strings for the new file contents.
+        """
+        return ('not applicable', None)
+
+
 class MergeHookParams(object):
     """Object holding parameters passed to merge_file_content hooks.
 
-    There are 3 fields hooks can access:
+    There are some fields hooks can access:
 
-    :ivar merger: the Merger object
     :ivar file_id: the file ID of the file being merged
     :ivar trans_id: the transform ID for the merge of this file
     :ivar this_kind: kind of file_id in 'this' tree
@@ -83,7 +107,7 @@
 
     def __init__(self, merger, file_id, trans_id, this_kind, other_kind,
             winner):
-        self.merger = merger
+        self._merger = merger
         self.file_id = file_id
         self.trans_id = trans_id
         self.this_kind = this_kind
@@ -97,17 +121,17 @@
     @decorators.cachedproperty
     def base_lines(self):
         """The lines of the 'base' version of the file."""
-        return self.merger.get_lines(self.merger.base_tree, self.file_id)
+        return self._merger.get_lines(self._merger.base_tree, self.file_id)
 
     @decorators.cachedproperty
     def this_lines(self):
         """The lines of the 'this' version of the file."""
-        return self.merger.get_lines(self.merger.this_tree, self.file_id)
+        return self._merger.get_lines(self._merger.this_tree, self.file_id)
 
     @decorators.cachedproperty
     def other_lines(self):
         """The lines of the 'other' version of the file."""
-        return self.merger.get_lines(self.merger.other_tree, self.file_id)
+        return self._merger.get_lines(self._merger.other_tree, self.file_id)
 
 
 class Merger(object):
@@ -708,12 +732,15 @@
             resolver = self._lca_multi_way
         child_pb = ui.ui_factory.nested_progress_bar()
         try:
+            factories = Merger.hooks['merge_file_content']
+            hooks = [factory(self) for factory in factories] + [self]
+            self.active_hooks = [hook for hook in hooks if hook is not None]
             for num, (file_id, changed, parents3, names3,
                       executable3) in enumerate(entries):
                 child_pb.update('Preparing file merge', num, len(entries))
                 self._merge_names(file_id, parents3, names3, resolver=resolver)
                 if changed:
-                    file_status = self.merge_contents(file_id)
+                    file_status = self._do_merge_contents(file_id)
                 else:
                     file_status = 'unmodified'
                 self._merge_executable(file_id,
@@ -1158,7 +1185,7 @@
             self.tt.adjust_path(names[self.winner_idx[name_winner]],
                                 parent_trans_id, trans_id)
 
-    def merge_contents(self, file_id):
+    def _do_merge_contents(self, file_id):
         """Performs a merge on file_id contents."""
         def contents_pair(tree):
             if file_id not in tree:
@@ -1198,11 +1225,10 @@
         trans_id = self.tt.trans_id_file_id(file_id)
         params = MergeHookParams(self, file_id, trans_id, this_pair[0],
             other_pair[0], winner)
-        hooks = Merger.hooks['merge_file_content']
-        hooks = list(hooks) + [self.default_text_merge]
+        hooks = self.active_hooks
         hook_status = 'not_applicable'
         for hook in hooks:
-            hook_status, lines = hook(params)
+            hook_status, lines = hook.merge_contents(params)
             if hook_status != 'not_applicable':
                 # Don't try any more hooks, this one applies.
                 break
@@ -1279,7 +1305,11 @@
                 'winner is OTHER, but file_id %r not in THIS or OTHER tree'
                 % (file_id,))
 
-    def default_text_merge(self, merge_hook_params):
+    def merge_contents(self, merge_hook_params):
+        """Fallback merge logic after user installed hooks."""
+        # This function is used in merge hooks as the fallback instance.
+        # Perhaps making this function and the functions it calls be a 
+        # a separate class would be better.
         if merge_hook_params.winner == 'other':
             # OTHER is a straight winner, so replace this contents with other
             return self._default_other_winner_merge(merge_hook_params)

=== modified file 'bzrlib/plugins/news_merge/__init__.py'
--- a/bzrlib/plugins/news_merge/__init__.py	2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/__init__.py	2010-01-28 17:27:16 +0000
@@ -39,46 +39,16 @@
 # overhead of this plugin as minimal as possible.
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
-from bzrlib.plugins.news_merge.news_merge import news_merger
+from bzrlib.plugins.news_merge import news_merge as _mod_news_merge
 """)
 
 from bzrlib.merge import Merger
 
 
-def news_merge_hook(params):
-    """Merger.merge_file_content hook function for bzr-format NEWS files."""
-    # First, check whether this custom merge logic should be used.  We expect
-    # most files should not be merged by this file.
-    if params.winner == 'other':
-        # OTHER is a straight winner, rely on default merge.
-        return 'not_applicable', None
-    elif not params.is_file_merge():
-        # THIS and OTHER aren't both files.
-        return 'not_applicable', None
-    elif not filename_matches_config(params):
-        # The filename isn't listed in the 'news_merge_files' config option.
-        return 'not_applicable', None
-    return news_merger(params)
-
-
-def filename_matches_config(params):
-    affected_files = getattr(params, '_news_merge_affected_files', None)
-    if affected_files is None:
-        config = params.merger.this_tree.branch.get_config()
-        # Until bzr provides a better policy for caching the config, we just
-        # add the part we're interested in to the params to avoid reading the
-        # config files repeatedly (bazaar.conf, location.conf, branch.conf).
-        affected_files = config.get_user_option_as_list('news_merge_files')
-        if affected_files is None:
-            # If nothing was specified in the config, we have nothing to do,
-            # but we use None in the params to start the caching.
-            affected_files = []
-        params._news_merge_affected_files = affected_files
-    if affected_files:
-        filename = params.merger.this_tree.id2path(params.file_id)
-        if filename in affected_files:
-            return True
-    return False
+def news_merge_hook(merger):
+    """Merger.merge_file_content hook for bzr-format NEWS files."""
+    return _mod_news_merge.NewsMerger(merger)
+
 
 def install_hook():
     Merger.hooks.install_named_hook(

=== modified file 'bzrlib/plugins/news_merge/news_merge.py'
--- a/bzrlib/plugins/news_merge/news_merge.py	2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/news_merge.py	2010-01-28 17:27:16 +0000
@@ -18,54 +18,98 @@
 
 
 from bzrlib.plugins.news_merge.parser import simple_parse
-from bzrlib import merge3
+from bzrlib import merge, merge3
 
 
 magic_marker = '|NEWS-MERGE-MAGIC-MARKER|'
 
 
-def news_merger(params):
-    """Perform a simple 3-way merge of a bzr NEWS file.
-    
-    Each section of a bzr NEWS file is essentially an ordered set of bullet
-    points, so we can simply take a set of bullet points, determine which
-    bullets to add and which to remove, sort, and reserialize.
+class NewsMerger(merge.AbstractPerFileMerger):
+    """Merge bzr NEWS files.
+
+    :ivar: affected_files.
     """
-    # Transform the different versions of the NEWS file into a bunch of text
-    # lines where each line matches one part of the overall structure, e.g. a
-    # heading or bullet.
-    def munge(lines):
-        return list(blocks_to_fakelines(simple_parse(''.join(lines))))
-    this_lines = munge(params.this_lines)
-    other_lines = munge(params.other_lines)
-    base_lines = munge(params.base_lines)
-    m3 = merge3.Merge3(base_lines, this_lines, other_lines)
-    result_lines = []
-    for group in m3.merge_groups():
-        if group[0] == 'conflict':
-            _, base, a, b = group
-            # Are all the conflicting lines bullets?  If so, we can merge this.
-            for line_set in [base, a, b]:
-                for line in line_set:
-                    if not line.startswith('bullet'):
-                        # Something else :(
-                        # Maybe the default merge can cope.
-                        return 'not_applicable', None
-            # Calculate additions and deletions.
-            new_in_a = set(a).difference(base)
-            new_in_b = set(b).difference(base)
-            all_new = new_in_a.union(new_in_b)
-            deleted_in_a = set(base).difference(a)
-            deleted_in_b = set(base).difference(b)
-            # Combine into the final set of bullet points.
-            final = all_new.difference(deleted_in_a).difference(deleted_in_b)
-            # Sort, and emit.
-            final = sorted(final, key=sort_key)
-            result_lines.extend(final)
-        else:
-            result_lines.extend(group[1])
-    # Transform the merged elements back into real blocks of lines.
-    return 'success', list(fakelines_to_blocks(result_lines))
+
+    def __init__(self, merger):
+        super(NewsMerger, self).__init__(merger)
+        self.affected_files = None
+
+    def merge_contents(self, params):
+        """Perform a simple 3-way merge of a bzr NEWS file.
+        
+        Each section of a bzr NEWS file is essentially an ordered set of bullet
+        points, so we can simply take a set of bullet points, determine which
+        bullets to add and which to remove, sort, and reserialize.
+        """
+        # First, check whether this custom merge logic should be used.  We
+        # expect most files should not be merged by this file.
+        if params.winner == 'other':
+            # OTHER is a straight winner, rely on default merge.
+            return 'not_applicable', None
+        elif not params.is_file_merge():
+            # THIS and OTHER aren't both files.
+            return 'not_applicable', None
+        elif not self.filename_matches_config(params):
+            # The filename isn't listed in the 'news_merge_files' config
+            # option.
+            return 'not_applicable', None
+        # Transform the different versions of the NEWS file into a bunch of
+        # text lines where each line matches one part of the overall
+        # structure, e.g. a heading or bullet.
+        def munge(lines):
+            return list(blocks_to_fakelines(simple_parse(''.join(lines))))
+        this_lines = munge(params.this_lines)
+        other_lines = munge(params.other_lines)
+        base_lines = munge(params.base_lines)
+        m3 = merge3.Merge3(base_lines, this_lines, other_lines)
+        result_lines = []
+        for group in m3.merge_groups():
+            if group[0] == 'conflict':
+                _, base, a, b = group
+                # Are all the conflicting lines bullets?  If so, we can merge
+                # this.
+                for line_set in [base, a, b]:
+                    for line in line_set:
+                        if not line.startswith('bullet'):
+                            # Something else :(
+                            # Maybe the default merge can cope.
+                            return 'not_applicable', None
+                # Calculate additions and deletions.
+                new_in_a = set(a).difference(base)
+                new_in_b = set(b).difference(base)
+                all_new = new_in_a.union(new_in_b)
+                deleted_in_a = set(base).difference(a)
+                deleted_in_b = set(base).difference(b)
+                # Combine into the final set of bullet points.
+                final = all_new.difference(deleted_in_a).difference(
+                    deleted_in_b)
+                # Sort, and emit.
+                final = sorted(final, key=sort_key)
+                result_lines.extend(final)
+            else:
+                result_lines.extend(group[1])
+        # Transform the merged elements back into real blocks of lines.
+        return 'success', list(fakelines_to_blocks(result_lines))
+
+    def filename_matches_config(self, params):
+        affected_files = self.affected_files
+        if affected_files is None:
+            config = self.merger.this_tree.branch.get_config()
+            # Until bzr provides a better policy for caching the config, we
+            # just add the part we're interested in to the params to avoid
+            # reading the config files repeatedly (bazaar.conf, location.conf,
+            # branch.conf).
+            affected_files = config.get_user_option_as_list('news_merge_files')
+            if affected_files is None:
+                # If nothing was specified in the config, we have nothing to do,
+                # but we use None in the params to start the caching.
+                affected_files = []
+            self.affected_files = affected_files
+        if affected_files:
+            filename = params.merger.this_tree.id2path(params.file_id)
+            if filename in affected_files:
+                return True
+        return False
 
 
 def blocks_to_fakelines(blocks):

=== modified file 'bzrlib/plugins/news_merge/tests/test_news_merge.py'
--- a/bzrlib/plugins/news_merge/tests/test_news_merge.py	2010-01-20 16:05:28 +0000
+++ b/bzrlib/plugins/news_merge/tests/test_news_merge.py	2010-01-28 17:27:16 +0000
@@ -21,7 +21,9 @@
     option,
     tests,
     )
+from bzrlib.merge import Merger
 from bzrlib.plugins import news_merge
+import bzrlib.plugins.news_merge.news_merge
 from bzrlib.tests import test_merge_core
 
 
@@ -29,23 +31,16 @@
 
     def test_affected_files_cached(self):
         """Ensures that the config variable is cached"""
-        news_merge.install_hook()
-        self.affected_files = None
-        orig = news_merge.filename_matches_config
-        def wrapper(params):
-            ret = orig(params)
-            # Capture the value set by the hook
-            self.affected_files = params._news_merge_affected_files
-            return ret
-        def restore():
-            news_merge.filename_matches_config = orig
-        self.addCleanup(restore)
-        news_merge.filename_matches_config = wrapper
-
+        def make_news_hook(merger):
+            result = news_merge.news_merge.NewsMerger(merger)
+            self.merger = result
+            return result
+        Merger.hooks.install_named_hook( 'merge_file_content', make_news_hook,
+            'test NEWS file merge')
         builder = test_merge_core.MergeBuilder(self.test_base_dir)
         self.addCleanup(builder.cleanup)
         builder.add_file('NEWS', builder.tree_root, 'name1', 'text1', True)
         builder.change_contents('NEWS', other='text4', this='text3')
         conflicts = builder.merge()
         # The hook should set the variable
-        self.assertIsNot(None, self.affected_files)
+        self.assertIsNot(None, self.merger.affected_files)

=== modified file 'bzrlib/tests/per_merger.py'
--- a/bzrlib/tests/per_merger.py	2010-01-20 16:05:28 +0000
+++ b/bzrlib/tests/per_merger.py	2010-01-28 17:27:16 +0000
@@ -198,55 +198,85 @@
         TestCaseWithTransport.setUp(self)
         self.hook_log = []
 
+    def install_hook_inactive(self):
+        def inactive_factory(merger):
+            # This hook is never active
+            self.hook_log.append(('inactive',))
+            return None
+        _mod_merge.Merger.hooks.install_named_hook(
+            'merge_file_content', inactive_factory, 'test hook (inactive)')
+
     def install_hook_noop(self):
-        def hook_na(merge_params):
-            # This hook unconditionally does nothing.
-            self.hook_log.append(('no-op',))
-            return 'not_applicable', None
+        test = self
+        class HookNA(_mod_merge.AbstractPerFileMerger):
+            def merge_contents(self, merge_params):
+                # This hook unconditionally does nothing.
+                test.hook_log.append(('no-op',))
+                return 'not_applicable', None
+        def hook_na_factory(merger):
+            return HookNA(merger)
         _mod_merge.Merger.hooks.install_named_hook(
-            'merge_file_content', hook_na, 'test hook (no-op)')
+            'merge_file_content', hook_na_factory, 'test hook (no-op)')
 
     def install_hook_success(self):
-        def hook_success(merge_params):
-            self.hook_log.append(('success',))
-            if merge_params.file_id == '1':
-                return 'success', ['text-merged-by-hook']
-            return 'not_applicable', None
+        test = self
+        class HookSuccess(_mod_merge.AbstractPerFileMerger):
+            def merge_contents(self, merge_params):
+                test.hook_log.append(('success',))
+                if merge_params.file_id == '1':
+                    return 'success', ['text-merged-by-hook']
+                return 'not_applicable', None
+        def hook_success_factory(merger):
+            return HookSuccess(merger)
         _mod_merge.Merger.hooks.install_named_hook(
-            'merge_file_content', hook_success, 'test hook (success)')
+            'merge_file_content', hook_success_factory, 'test hook (success)')
 
     def install_hook_conflict(self):
-        def hook_conflict(merge_params):
-            self.hook_log.append(('conflict',))
-            if merge_params.file_id == '1':
-                return 'conflicted', ['text-with-conflict-markers-from-hook']
-            return 'not_applicable', None
+        test = self
+        class HookConflict(_mod_merge.AbstractPerFileMerger):
+            def merge_contents(self, merge_params):
+                test.hook_log.append(('conflict',))
+                if merge_params.file_id == '1':
+                    return ('conflicted',
+                        ['text-with-conflict-markers-from-hook'])
+                return 'not_applicable', None
+        def hook_conflict_factory(merger):
+            return HookConflict(merger)
         _mod_merge.Merger.hooks.install_named_hook(
-            'merge_file_content', hook_conflict, 'test hook (delete)')
+            'merge_file_content', hook_conflict_factory, 'test hook (delete)')
 
     def install_hook_delete(self):
-        def hook_delete(merge_params):
-            self.hook_log.append(('delete',))
-            if merge_params.file_id == '1':
-                return 'delete', None
-            return 'not_applicable', None
+        test = self
+        class HookDelete(_mod_merge.AbstractPerFileMerger):
+            def merge_contents(self, merge_params):
+                test.hook_log.append(('delete',))
+                if merge_params.file_id == '1':
+                    return 'delete', None
+                return 'not_applicable', None
+        def hook_delete_factory(merger):
+            return HookDelete(merger)
         _mod_merge.Merger.hooks.install_named_hook(
-            'merge_file_content', hook_delete, 'test hook (delete)')
+            'merge_file_content', hook_delete_factory, 'test hook (delete)')
 
     def install_hook_log_lines(self):
         """Install a hook that saves the get_lines for the this, base and other
         versions of the file.
         """
-        def hook_log_lines(merge_params):
-            self.hook_log.append((
-                'log_lines',
-                merge_params.this_lines,
-                merge_params.other_lines,
-                merge_params.base_lines,
-                ))
-            return 'not_applicable', None
+        test = self
+        class HookLogLines(_mod_merge.AbstractPerFileMerger):
+            def merge_contents(self, merge_params):
+                test.hook_log.append((
+                    'log_lines',
+                    merge_params.this_lines,
+                    merge_params.other_lines,
+                    merge_params.base_lines,
+                    ))
+                return 'not_applicable', None
+        def hook_log_lines_factory(merger):
+            return HookLogLines(merger)
         _mod_merge.Merger.hooks.install_named_hook(
-            'merge_file_content', hook_log_lines, 'test hook (log_lines)')
+            'merge_file_content', hook_log_lines_factory,
+            'test hook (log_lines)')
 
     def make_merge_builder(self):
         builder = MergeBuilder(self.test_base_dir)
@@ -315,6 +345,18 @@
         self.assertEqual(
             [('log_lines', ['text2'], ['text3'], ['text1'])], self.hook_log)
 
+    def test_chain_when_not_active(self):
+        """When a hook function returns None, merging still works."""
+        self.install_hook_inactive()
+        self.install_hook_success()
+        builder = self.make_merge_builder()
+        self.create_file_needing_contents_merge(builder, "1")
+        conflicts = builder.merge(self.merge_type)
+        self.assertEqual(conflicts, [])
+        self.assertEqual(
+            builder.this.get_file('1').read(), 'text-merged-by-hook')
+        self.assertEqual([('inactive',), ('success',)], self.hook_log)
+
     def test_chain_when_not_applicable(self):
         """When a hook function returns not_applicable, the next function is
         tried (when one exists).




More information about the bazaar-commits mailing list