[MERGE REVIEW] --reprocess support for weaves

Mon Apr 10 03:03:34 BST 2006

On 09/04/2006, at 11:21 AM, Aaron Bentley wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Hi all,
>
> This patch does a number of things:
> 1. Refactor some of the weave merging functionality into TextMerge
> 2. Change the output of a weave merge into a more structured format,
> leaving text output to TextMerge
> 3. Implement a two-way merge (Merge2) that can operate on the  
> structured
> merge output
> 4. Refactor more weave code into PlanWeaveMerge and WeaveMerge
> 5. Implement weave reprocessing in terms of Merge2, for both merge and
> remerge.
> 6. Implement conflict detection in terms of the structured merge  
> output
>
> You may find the branch itself more comprehensible than the patch.   
> It's at
> http://code.aaronbentley.com/bzr/bzrrepo/bzr.ab/
>
> A future possibility would be to turn Merge3 into a child of  
> TextMerge.
> ~ Or heck, even Diff3Merge, if we want.

Looks good; comments below.

>
> Aaron
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.1 (GNU/Linux)
> Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org
>
> iD8DBQFEOGGk0F+nu1YWqI0RAlQDAJ4v2mII/oMRRYa00Ljikx4lhZ/E6wCbBJG3
> Jt1zgI7yQHFG+piLW5G7JFk=
> =MYbs
> -----END PGP SIGNATURE-----
> === added file 'b/bzrlib/tests/blackbox/test_merge.py'
> --- /dev/null	
> +++ b/bzrlib/tests/blackbox/test_merge.py	
> @@ -0,0 +1,111 @@
> +# Copyright (C) 2006 Canonical Ltd
> +#
> +# This program is free software; you can redistribute it and/or  
> modify
> +# it under the terms of the GNU General Public License as  
> published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   
> 02111-1307  USA
> +#
> +# Author: Aaron Bentley <aaron.bentley at utoronto.ca>
> +
> +import os
> +
> +from bzrlib.bzrdir import BzrDir
> +from bzrlib.tests.blackbox import ExternalBase
> +from bzrlib.workingtree import WorkingTree
> +
> +class TestMerge(ExternalBase):
> +
> +    def example_branch(test):
> +        test.runbzr('init')
> +        file('hello', 'wt').write('foo')
> +        test.runbzr('add hello')
> +        test.runbzr('commit -m setup hello')
> +        file('goodbye', 'wt').write('baz')
> +        test.runbzr('add goodbye')
> +        test.runbzr('commit -m setup goodbye')
> +
> +    def test_merge_reprocess(self):
> +        d = BzrDir.create_standalone_workingtree('.')
> +        d.commit('h')
> +        self.run_bzr('merge', '.', '--reprocess', '--merge-type',  
> 'weave')
> +
> +    def test_merge(self):
> +        from bzrlib.branch import Branch
> +
> +        os.mkdir('a')
> +        os.chdir('a')
> +        self.example_branch()
> +        os.chdir('..')
> +        self.runbzr('branch a b')
> +        os.chdir('b')
> +        file('goodbye', 'wt').write('quux')
> +        self.runbzr(['commit',  '-m',  "more u's are always good"])
> +
> +        os.chdir('../a')
> +        file('hello', 'wt').write('quuux')
> +        # We can't merge when there are in-tree changes
> +        self.runbzr('merge ../b', retcode=3)
> +        self.runbzr(['commit', '-m', "Like an epidemic of u's"])
> +        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> blooof',
> +                    retcode=3)
> +        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> merge3')
> +        self.runbzr('revert --no-backup')
> +        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> weave')
> +        self.runbzr('revert --no-backup')
> +        self.runbzr('merge ../b -r last:1..last:1 --reprocess')
> +        self.runbzr('revert --no-backup')
> +        self.runbzr('merge ../b -r last:1')
> +        self.check_file_contents('goodbye', 'quux')
> +        # Merging a branch pulls its revision into the tree
> +        a = WorkingTree.open('.')
> +        b = Branch.open('../b')
> +        a.branch.repository.get_revision_xml(b.last_revision())
> +        self.log('pending merges: %s', a.pending_merges())
> +        self.assertEquals(a.pending_merges(),
> +                          [b.last_revision()])
> +        self.runbzr('commit -m merged')
> +        self.runbzr('merge ../b -r last:1')
> +        self.assertEqual(a.pending_merges(), [])
> +
> +    def test_merge_with_missing_file(self):
> +        """Merge handles missing file conflicts"""
> +        os.mkdir('a')
> +        os.chdir('a')
> +        os.mkdir('sub')
> +        print >> file('sub/a.txt', 'wb'), "hello"
> +        print >> file('b.txt', 'wb'), "hello"
> +        print >> file('sub/c.txt', 'wb'), "hello"
> +        self.runbzr('init')
> +        self.runbzr('add')
> +        self.runbzr(('commit', '-m', 'added a'))
> +        self.runbzr('branch . ../b')
> +        print >> file('sub/a.txt', 'ab'), "there"
> +        print >> file('b.txt', 'ab'), "there"
> +        print >> file('sub/c.txt', 'ab'), "there"
> +        self.runbzr(('commit', '-m', 'Added there'))
> +        os.unlink('sub/a.txt')
> +        os.unlink('sub/c.txt')
> +        os.rmdir('sub')
> +        os.unlink('b.txt')
> +        self.runbzr(('commit', '-m', 'Removed a.txt'))
> +        os.chdir('../b')
> +        print >> file('sub/a.txt', 'ab'), "something"
> +        print >> file('b.txt', 'ab'), "something"
> +        print >> file('sub/c.txt', 'ab'), "something"
> +        self.runbzr(('commit', '-m', 'Modified a.txt'))
> +        self.runbzr('merge ../a/', retcode=1)
> +        self.assert_(os.path.exists('sub/a.txt.THIS'))
> +        self.assert_(os.path.exists('sub/a.txt.BASE'))
> +        os.chdir('../a')
> +        self.runbzr('merge ../b/', retcode=1)
> +        self.assert_(os.path.exists('sub/a.txt.OTHER'))
> +        self.assert_(os.path.exists('sub/a.txt.BASE'))
>
> === added file 'b/bzrlib/tests/test_textmerge.py'
> --- /dev/null	
> +++ b/bzrlib/tests/test_textmerge.py	
> @@ -0,0 +1,41 @@
> +# Copyright (C) 2006 Canonical Ltd
> +#
> +# This program is free software; you can redistribute it and/or  
> modify
> +# it under the terms of the GNU General Public License as  
> published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   
> 02111-1307  USA
> +#
> +# Author: Aaron Bentley <aaron.bentley at utoronto.ca>
> +from bzrlib.textmerge import Merge2
> +from bzrlib.tests import TestCase
> +class TestMerge2(TestCase):
> +    def test_agreed(self):
> +        lines = "a\nb\nc\nd\ne\nf\n".splitlines(True)
> +        mlines = list(Merge2(lines, lines).merge_lines()[0])
> +        self.assertEqualDiff(mlines, lines)
> +
> +    def test_conflict(self):
> +        lines_a = "a\nb\nc\nd\ne\nf\ng\nh\n".splitlines(True)
> +        lines_b = "z\nb\nx\nd\ne\ne\nf\ng\ny\n".splitlines(True)
> +        expected = "<\na\n=\nz\n>\nb\n<\nc\n=\nx\n>\nd\n<\n=\ne\n> 
> \ne\nf\n"\
> +                   "g\n<\nh\n=\ny\n>\n"
> +        m2 = Merge2(lines_a, lines_b, '<\n', '>\n', '=\n')
> +        mlines= m2.merge_lines()[0]
> +        self.assertEqualDiff(''.join(mlines), expected)
> +        mlines= m2.merge_lines(reprocess=True)[0]
> +        self.assertEqualDiff(''.join(mlines), expected)
> +
> +    def test_reprocess(self):
> +        struct = [('a', 'b'), ('c',), ('def','geh'), ('i',)]
> +        expect = [('a', 'b'), ('c',), ('d', 'g'), ('e',), ('f',  
> 'h'), ('i',)]
> +        result = Merge2.reprocess_struct(struct)
> +        self.assertEqual(list(result), expect)
>
> === added file 'b/bzrlib/textmerge.py'
> --- /dev/null	
> +++ b/bzrlib/textmerge.py	
> @@ -0,0 +1,140 @@
> +# Copyright (C) 2005, 2006 Canonical Ltd
> +#
> +# This program is free software; you can redistribute it and/or  
> modify
> +# it under the terms of the GNU General Public License as  
> published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   
> 02111-1307  USA
> +#
> +# Author: Martin Pool <mbp at canonical.com>
> +#         Aaron Bentley <aaron.bentley at utoronto.ca>
> +
> +
> +from difflib import SequenceMatcher
> +
> +
> +class TextMerge(object):
> +    """Base class for text-mergers
> +    Subclasses must implement _merge_struct.

OK, but what is _merge_struct?  Perhaps add a base implementation  
that raises NotImplementedError and has a docstring describing what  
it should return.  (A sequence of tuples for conflicted/non- 
conflicted regions?)

> +
> +    Many methods produce or consume structured merge information.
> +    This is an iterable of tuples of lists of lines.
> +    Each tuple may have a length of 1 - 3, depending on whether  
> the region it
> +    represents is conflicted.
> +
> +    Unconflicted region tuples have length 1.
> +    Conflicted region tuples have length 2 or 3.  Index 1 is  
> text_a, e.g. THIS.
> +    Index 1 is text_b, e.g. OTHER.  Index 2 is optional.  If  
> present, it
> +    represents BASE.
> +    """

This could do with more documentation: how do you get one of these  
objects, and what do you do with it when you have it?

> +    # TODO: Show some version information (e.g. author, date) on  
> conflicted
> +    # regions.
> +
> +    def __init__(self, a_marker='<<<<<<< \n', b_marker='>>>>>>> \n',
> +                 split_marker='=======\n'):
> +        self.a_marker = a_marker
> +        self.b_marker = b_marker
> +        self.split_marker = split_marker
> +
> +    def struct_to_lines(self, struct_iter):
> +        """Convert merge result tuples to lines"""
> +        for lines in struct_iter:
> +            if len(lines) == 1:
> +                for line in lines[0]:
> +                    yield line
> +            else:
> +                yield self.a_marker
> +                for line in lines[0]:
> +                    yield line
> +                yield self.split_marker
> +                for line in lines[1]:
> +                    yield line
> +                yield self.b_marker
> +
> +    def iter_useful(self, struct_iter):
> +        """Iterate through input tuples, skipping empty ones."""
> +        for group in struct_iter:
> +            if len(group[0]) > 0:
> +                yield group
> +            elif len(group) > 1 and len(group[1]) > 0:
> +                yield group
> +
> +    def merge_lines(self, reprocess=False):
> +        """
> +        Produce an iterable of lines, suitable for writing to a file
> +        Returns a tuple of (line iterable, conflict indicator)
> +        If reprocess is True, a two-way merge will be performed on  
> the
> +        intermediate structure, to reduce conflict regions.
> +        """
> +        struct = []
> +        conflicts = False
> +        for group in self.merge_struct(reprocess):
> +            struct.append(group)
> +            if len(group) > 1:
> +                conflicts = True
> +        return self.struct_to_lines(struct), conflicts
> +
> +    def merge_struct(self, reprocess=False):
> +        """Produce structured merge info"""
> +        struct_iter = self.iter_useful(self._merge_struct())
> +        if reprocess is True:
> +            return self.reprocess_struct(struct_iter)
> +        else:
> +            return struct_iter
> +
> +    @staticmethod
> +    def reprocess_struct(struct_iter):
> +        """
> +        Perform a two-way merge on a two-way merge on structural  
> merge info.
> +        This reduces the size of conflict regions, but breaks the  
> connection
> +        between the BASE and the conflict region.
> +
> +        This process may split a single conflict region into  
> several smaller
> +        ones, but will not introduce new conflicts.
> +        """
> +        for group in struct_iter:
> +            if len(group) == 1:
> +                yield group
> +            else:
> +                for newgroup in Merge2(group[0], group 
> [1]).merge_struct():
> +                    yield newgroup
> +
> +
> +class Merge2(TextMerge):
> +    """
> +    Two-way merge.
> +    In a two way merge, common regions are shown as unconflicting,  
> and uncommon
> +    regions produce conflicts.
> +    """

The summary should be on the same line as the opening quote.

> +
> +    def __init__(self, lines_a, lines_b, a_marker='<<<<<<< \n',
> +                 b_marker='>>>>>>> \n', split_marker='=======\n'):
> +        TextMerge.__init__(self, a_marker, b_marker, split_marker)
> +        self.lines_a = lines_a
> +        self.lines_b = lines_b
> +

To avoid repetition perhaps the default markers should be constants  
in the base class?

> +    def _merge_struct(self):
> +        """
> +        Return structured merge info.
> +        See TextMerge docstring.
> +        """
> +        sm = SequenceMatcher(None, self.lines_a, self.lines_b)
> +        pos_a = 0
> +        pos_b = 0
> +        for ai, bi, l in sm.get_matching_blocks():
> +            # non-matching lines
> +            yield(self.lines_a[pos_a:ai], self.lines_b[pos_b:bi])
> +            # matching lines
> +            yield(self.lines_a[ai:ai+l],)
> +            pos_a = ai + l
> +            pos_b = bi + l
> +        # final non-matching lines
> +        yield(self.lines_a[pos_a:-1], self.lines_b[pos_b:-1])
>
> === modified file 'a/bzrlib/builtins.py'
> --- a/bzrlib/builtins.py	
> +++ b/bzrlib/builtins.py	
> @@ -2405,7 +2405,7 @@
>      if show_base and not merge_type is Merge3Merger:
>          raise BzrCommandError("Show-base is not supported for this  
> merge"
>                                " type. %s" % merge_type)
> -    if reprocess and not merge_type is Merge3Merger:
> +    if reprocess and not merge_type.supports_reprocess:
>          raise BzrCommandError("Reprocess is not supported for this  
> merge"
>                                " type. %s" % merge_type)
>      if reprocess and show_base:
>
> === modified file 'a/bzrlib/merge.py'
> --- a/bzrlib/merge.py	
> +++ b/bzrlib/merge.py	
> @@ -44,6 +44,7 @@
>  from bzrlib.transform import (TreeTransform, resolve_conflicts,  
> cook_conflicts,
>                                conflicts_strings, FinalPaths,  
> create_by_entry,
>                                unique_add)
> +from bzrlib.versionedfile import WeaveMerge
>  import bzrlib.ui
>
>  # TODO: Report back as changes are merged in
> @@ -244,7 +245,7 @@
>              kwargs['reprocess'] = self.reprocess
>          elif self.reprocess:
>              raise BzrError("Reprocess is not supported for this  
> merge"
> -                                  " type. %s" % merge_type)
> +                                  " type. %s" % self.merge_type)

May be better as

   "Reprocess is not supported for merge type %s"

Perhaps we should switch here to a more concrete name than  
"reprocess", e.g. "conflict reduction".

>          if self.merge_type.supports_show_base:
>              kwargs['show_base'] = self.show_base
>          elif self.show_base:
> @@ -747,17 +748,18 @@
>
>  class WeaveMerger(Merge3Merger):
>      """Three-way tree merger, text weave merger."""
> -    supports_reprocess = False
> +    supports_reprocess = True
>      supports_show_base = False
>
>      def __init__(self, working_tree, this_tree, base_tree,  
> other_tree,
> -                 interesting_ids=None, pb=DummyProgress(), pp=None):
> +                 interesting_ids=None, pb=DummyProgress(), pp=None,
> +                 reprocess=False):
>          self.this_revision_tree = self._get_revision_tree(this_tree)
>          self.other_revision_tree = self._get_revision_tree 
> (other_tree)
>          super(WeaveMerger, self).__init__(working_tree, this_tree,
>                                            base_tree, other_tree,
>                                             
> interesting_ids=interesting_ids,
> -                                          pb=pb, pp=pp)
> +                                          pb=pb, pp=pp,  
> reprocess=reprocess)
>
>      def _get_revision_tree(self, tree):
>          """Return a revision tree releated to this tree.
> @@ -787,9 +789,9 @@
>          this_revision_id = self.this_revision_tree.inventory 
> [file_id].revision
>          other_revision_id = \
>              self.other_revision_tree.inventory[file_id].revision
> -        plan =  weave.plan_merge(this_revision_id, other_revision_id)
> -        return weave.weave_merge(plan, '<<<<<<< TREE\n',
> -                                       '>>>>>>> MERGE-SOURCE\n')
> +        wm = WeaveMerge(weave, this_revision_id, other_revision_id,
> +                        '<<<<<<< TREE\n', '>>>>>>> MERGE-SOURCE\n')
> +        return wm.merge_lines(self.reprocess)
>
>      def text_merge(self, file_id, trans_id):
>          """Perform a (weave) text merge for a given file and file-id.
> @@ -797,8 +799,7 @@
>          and a conflict will be noted.
>          """
>          self._check_file(file_id)
> -        lines = list(self._merged_lines(file_id))
> -        conflicts = '<<<<<<< TREE\n' in lines
> +        lines, conflicts = self._merged_lines(file_id)
>          self.tt.create_file(lines, trans_id)
>          if conflicts:
>              self._raw_conflicts.append(('text conflict', trans_id))
>
> === modified file 'a/bzrlib/tests/__init__.py'
> --- a/bzrlib/tests/__init__.py	
> +++ b/bzrlib/tests/__init__.py	
> @@ -972,6 +972,7 @@
>                     'bzrlib.tests.test_store',
>                     'bzrlib.tests.test_symbol_versioning',
>                     'bzrlib.tests.test_testament',
> +                   'bzrlib.tests.test_textmerge',
>                     'bzrlib.tests.test_trace',
>                     'bzrlib.tests.test_transactions',
>                     'bzrlib.tests.test_transform',
>
> === modified file 'a/bzrlib/tests/blackbox/__init__.py'
> --- a/bzrlib/tests/blackbox/__init__.py	
> +++ b/bzrlib/tests/blackbox/__init__.py	
> @@ -51,6 +51,7 @@
>                       'bzrlib.tests.blackbox.test_init',
>                       'bzrlib.tests.blackbox.test_log',
>                       'bzrlib.tests.blackbox.test_logformats',
> +                     'bzrlib.tests.blackbox.test_merge',
>                       'bzrlib.tests.blackbox.test_missing',
>                       'bzrlib.tests.blackbox.test_outside_wt',
>                       'bzrlib.tests.blackbox.test_pull',
>
> === modified file 'a/bzrlib/tests/blackbox/test_too_much.py'
> --- a/bzrlib/tests/blackbox/test_too_much.py	
> +++ b/bzrlib/tests/blackbox/test_too_much.py	
> @@ -321,78 +321,6 @@
>          self.assertEqual('2', target.open_branch().last_revision())
>          self.assertEqual('2', target.open_workingtree 
> ().last_revision())
>          self.assertTrue(target.open_branch 
> ().repository.has_revision('2'))
> -
> -    def test_merge(self):
> -        from bzrlib.branch import Branch

Nice to see this shrink.

> -
> -        os.mkdir('a')
> -        os.chdir('a')
> -        self.example_branch()
> -        os.chdir('..')
> -        self.runbzr('branch a b')
> -        os.chdir('b')
> -        file('goodbye', 'wt').write('quux')
> -        self.runbzr(['commit',  '-m',  "more u's are always good"])
> -
> -        os.chdir('../a')
> -        file('hello', 'wt').write('quuux')
> -        # We can't merge when there are in-tree changes
> -        self.runbzr('merge ../b', retcode=3)
> -        self.runbzr(['commit', '-m', "Like an epidemic of u's"])
> -        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> blooof',
> -                    retcode=3)
> -        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> merge3')
> -        self.runbzr('revert --no-backup')
> -        self.runbzr('merge ../b -r last:1..last:1 --merge-type  
> weave')
> -        self.runbzr('revert --no-backup')
> -        self.runbzr('merge ../b -r last:1..last:1 --reprocess')
> -        self.runbzr('revert --no-backup')
> -        self.runbzr('merge ../b -r last:1')
> -        self.check_file_contents('goodbye', 'quux')
> -        # Merging a branch pulls its revision into the tree
> -        a = WorkingTree.open('.')
> -        b = Branch.open('../b')
> -        a.branch.repository.get_revision_xml(b.last_revision())
> -        self.log('pending merges: %s', a.pending_merges())
> -        self.assertEquals(a.pending_merges(),
> -                          [b.last_revision()])
> -        self.runbzr('commit -m merged')
> -        self.runbzr('merge ../b -r last:1')
> -        self.assertEqual(a.pending_merges(), [])
> -
> -    def test_merge_with_missing_file(self):
> -        """Merge handles missing file conflicts"""
> -        os.mkdir('a')
> -        os.chdir('a')
> -        os.mkdir('sub')
> -        print >> file('sub/a.txt', 'wb'), "hello"
> -        print >> file('b.txt', 'wb'), "hello"
> -        print >> file('sub/c.txt', 'wb'), "hello"
> -        self.runbzr('init')
> -        self.runbzr('add')
> -        self.runbzr(('commit', '-m', 'added a'))
> -        self.runbzr('branch . ../b')
> -        print >> file('sub/a.txt', 'ab'), "there"
> -        print >> file('b.txt', 'ab'), "there"
> -        print >> file('sub/c.txt', 'ab'), "there"
> -        self.runbzr(('commit', '-m', 'Added there'))
> -        os.unlink('sub/a.txt')
> -        os.unlink('sub/c.txt')
> -        os.rmdir('sub')
> -        os.unlink('b.txt')
> -        self.runbzr(('commit', '-m', 'Removed a.txt'))
> -        os.chdir('../b')
> -        print >> file('sub/a.txt', 'ab'), "something"
> -        print >> file('b.txt', 'ab'), "something"
> -        print >> file('sub/c.txt', 'ab'), "something"
> -        self.runbzr(('commit', '-m', 'Modified a.txt'))
> -        self.runbzr('merge ../a/', retcode=1)
> -        self.assert_(os.path.exists('sub/a.txt.THIS'))
> -        self.assert_(os.path.exists('sub/a.txt.BASE'))
> -        os.chdir('../a')
> -        self.runbzr('merge ../b/', retcode=1)
> -        self.assert_(os.path.exists('sub/a.txt.OTHER'))
> -        self.assert_(os.path.exists('sub/a.txt.BASE'))
>
>      def test_inventory(self):
>          bzr = self.runbzr
> @@ -738,8 +666,8 @@
>          assert '|||||||' not in conflict_text
>          assert 'hi world' not in conflict_text
>          self.runbzr('remerge . --merge-type weave --show-base',  
> retcode=3)
> -        self.runbzr('remerge . --merge-type weave --reprocess',  
> retcode=3)
>          self.runbzr('remerge . --show-base --reprocess', retcode=3)
> +        self.runbzr('remerge . --merge-type weave --reprocess',  
> retcode=1)
>          self.runbzr('remerge hello --show-base', retcode=1)
>          self.runbzr('remerge hello --reprocess', retcode=1)
>          self.runbzr('resolve --all')
>
> === modified file 'a/bzrlib/tests/test_merge_core.py'
> --- a/bzrlib/tests/test_merge_core.py	
> +++ b/bzrlib/tests/test_merge_core.py	
> @@ -43,7 +43,7 @@
>          for tt in (self.this_tt, self.base_tt, self.other_tt):
>              new_file(tt)
>
> -    def merge(self, merge_type=Merge3Merger, interesting_ids=None):
> +    def merge(self, merge_type=Merge3Merger, interesting_ids=None,  
> **kwargs):
>          self.base_tt.apply()
>          self.base.commit('base commit')
>          for tt, wt in ((self.this_tt, self.this), (self.other_tt,  
> self.other)):
> @@ -54,7 +54,7 @@
>          self.this.branch.fetch(self.other.branch)
>          other_basis = self.other.branch.basis_tree()
>          merger = merge_type(self.this, self.this, self.base,  
> other_basis,
> -                            interesting_ids=interesting_ids)
> +                            interesting_ids=interesting_ids,  
> **kwargs)
>          return merger.cooked_conflicts
>
>      def list_transforms(self):
> @@ -244,6 +244,29 @@
>      def test_contents_merge3(self):
>          """Test diff3 merging"""
>          self.do_contents_test(WeaveMerger)
> +
> +    def test_reprocess_weave(self):
> +        # Reprocess works on weaves, and behaves as expected
> +        builder = MergeBuilder()
> +        builder.add_file('a', 'TREE_ROOT', 'blah', 'a', False)
> +        builder.change_contents('a', this='b\nc\nd\ne\n', other='z 
> \nc\nd\ny\n')
> +        builder.merge(WeaveMerger, reprocess=True)
> +        expected = """<<<<<<< TREE
> +b
> +=======
> +z
> +>>>>>>> MERGE-SOURCE
> +c
> +d
> +<<<<<<< TREE
> +e
> +=======
> +y
> +>>>>>>> MERGE-SOURCE
> +"""
> +        self.assertEqualDiff(builder.this.get_file("a").read(),  
> expected)
> +
> +
>
>      def do_contents_test(self, merge_factory):
>          """Test merging with specified ContentsChange factory"""
>
> === modified file 'a/bzrlib/versionedfile.py'
> --- a/bzrlib/versionedfile.py	
> +++ b/bzrlib/versionedfile.py	
> @@ -31,6 +31,7 @@
>  import bzrlib.errors as errors
>  from bzrlib.inter import InterObject
>  from bzrlib.symbol_versioning import *
> +from bzrlib.textmerge import TextMerge
>  from bzrlib.transport.memory import MemoryTransport
>  from bzrlib.tsort import topo_sort
>  from bzrlib import ui
> @@ -393,7 +394,25 @@
>          """Walk the names list."""
>          return iter(self.versions())
>
> -    def plan_merge(self, ver_a, ver_b):
> +    def plan_merge(versionedfile, ver_a, ver_b):
> +        return PlanWeaveMerge.plan_merge(versionedfile, ver_a, ver_b)
> +
> +    def weave_merge(self, plan, a_marker='<<<<<<< \n',  
> b_marker='>>>>>>> \n'):
> +        return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines 
> ()[0]
> +
> +class PlanWeaveMerge(TextMerge):
> +    """Weave merge that takes a plan as its input.
> +
> +    This exists so that VersionedFile.plan_merge is  
> implementable.  Otherwise,
> +    we'd just use WeaveMerge everywhere.
> +    """

I can understand having this class split out but I don't understand  
this comment.  Would it be OK to say

   Most callers will want to use WeaveMerge instead.

Does this need to be a different class?  Why not just have a  
WeaveMerge.from_plan static method, and then the regular constructor  
will get the plan and call that.

> +
> +    def __init__(self, plan, a_marker='<<<<<<< \n',  
> b_marker='>>>>>>> \n'):
> +        TextMerge.__init__(self, a_marker, b_marker)
> +        self.plan = plan
> +
> +    @staticmethod
> +    def plan_merge(versionedfile, ver_a, ver_b):
>          """Return pseudo-annotation indicating how the two  
> versions merge.
>
>          This is computed between versions a and b and their common
> @@ -401,11 +420,12 @@
>
>          Weave lines present in none of them are skipped entirely.
>          """
> -        inc_a = set(self.get_ancestry([ver_a]))
> -        inc_b = set(self.get_ancestry([ver_b]))
> +        inc_a = set(versionedfile.get_ancestry([ver_a]))
> +        inc_b = set(versionedfile.get_ancestry([ver_b]))
>          inc_c = inc_a & inc_b
>
> -        for lineno, insert, deleteset, line in self.walk([ver_a,  
> ver_b]):
> +        for lineno, insert, deleteset, line in\
> +            versionedfile.walk([ver_a, ver_b]):
>              if deleteset & inc_c:
>                  # killed in parent; can't be in either a or b
>                  # not relevant to our work
> @@ -439,45 +459,36 @@
>
>          yield 'unchanged', ''           # terminator
>
> -    def weave_merge(self, plan, a_marker='<<<<<<< \n',  
> b_marker='>>>>>>> \n'):
> +    def _merge_struct(self):
>          lines_a = []
>          lines_b = []
>          ch_a = ch_b = False
> -        # TODO: Return a structured form of the conflicts (e.g. 2- 
> tuples for
> -        # conflicted regions), rather than just inserting the  
> markers.
> -        #
> -        # TODO: Show some version information (e.g. author, date) on
> -        # conflicted regions.
> -
> +
>          # We previously considered either 'unchanged' or 'killed- 
> both' lines
>          # to be possible places to resynchronize.  However,  
> assuming agreement
>          # on killed-both lines may be too agressive. -- mbp 20060324
> -        for state, line in plan:
> +        for state, line in self.plan:
>              if state == 'unchanged':
>                  # resync and flush queued conflicts changes if any
>                  if not lines_a and not lines_b:
>                      pass
>                  elif ch_a and not ch_b:
> -                    # one-sided change:
> -                    for l in lines_a: yield l
> +                    # one-sided change:
> +                    yield(lines_a,)
>                  elif ch_b and not ch_a:
> -                    for l in lines_b: yield l
> +                    yield (lines_b,)
>                  elif lines_a == lines_b:
> -                    for l in lines_a: yield l
> +                    yield(lines_a,)
>                  else:
> -                    yield a_marker
> -                    for l in lines_a: yield l
> -                    yield '=======\n'
> -                    for l in lines_b: yield l
> -                    yield b_marker
> -
> -                del lines_a[:]
> -                del lines_b[:]
> +                    yield (lines_a, lines_b)
> +
> +                lines_a = []
> +                lines_b = []
>                  ch_a = ch_b = False
>
>              if state == 'unchanged':
>                  if line:
> -                    yield line
> +                    yield ([line],)
>              elif state == 'killed-a':
>                  ch_a = True
>                  lines_b.append(line)
> @@ -491,9 +502,17 @@
>                  ch_b = True
>                  lines_b.append(line)
>              else:
> -                assert state in ('irrelevant', 'ghost-a', 'ghost- 
> b', 'killed-base',
> -                                 'killed-both'), \
> -                       state
> +                assert state in ('irrelevant', 'ghost-a', 'ghost-b',
> +                                 'killed-base', 'killed-both'), state
> +
> +
> +class WeaveMerge(PlanWeaveMerge):
> +    """Weave merge that takes a VersionedFile and two versions as  
> its input"""
> +
> +    def __init__(self, versionedfile, ver_a, ver_b,
> +        a_marker='<<<<<<< \n', b_marker='>>>>>>> \n'):
> +        plan = self.plan_merge(versionedfile, ver_a, ver_b)
> +        PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
>
>
>  class InterVersionedFile(InterObject):
>

-- 
Martin