[MERGE REVIEW] plan-merge for knits
Martin Pool
mbp at sourcefrog.net
Wed Apr 19 02:25:39 BST 2006
On 18 Apr 2006, Aaron Bentley <aaron.bentley at utoronto.ca> wrote:
> Hi all,
>
> This patch implements plan_merge for knits, which means knits have
> native weave merge support. This means it doesn't use
> VersionedFile.walk, which cannot be implemented efficiently on knits.
+1 with comments
> === modified file 'a/bzrlib/knit.py'
> --- a/bzrlib/knit.py
> +++ b/bzrlib/knit.py
> @@ -78,7 +78,7 @@
> from bzrlib.tuned_gzip import *
> from bzrlib.trace import mutter
> from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
> - sha_strings
> + sha_strings, xenumerate
> from bzrlib.versionedfile import VersionedFile, InterVersionedFile
> from bzrlib.tsort import topo_sort
>
> @@ -815,6 +815,47 @@
>
> for lineno, insert_id, dset, line in w.walk(version_ids):
> yield lineno, insert_id, dset, line
> +
> + def plan_merge(self, ver_a, ver_b):
> + ancestors_b = set(self.get_ancestry(ver_b))
> + def status_a(revision, text):
> + if revision in ancestors_b:
> + return 'killed-b', text
> + else:
> + return 'new-a', text
> +
> + ancestors_a = set(self.get_ancestry(ver_a))
> + def status_b(revision, text):
> + if revision in ancestors_a:
> + return 'killed-a', text
> + else:
> + return 'new-b', text
> +
> + annotated_a = self.annotate(ver_a)
> + annotated_b = self.annotate(ver_b)
> + plain_a = [t for a, t in annotated_a]
> + plain_b = [t for a, t in annotated_b]
I'd write
[t for (a, t) in annotated_a]
just to make the precedence obvious.
> + blocks = SequenceMatcher(None, plain_a, plain_b).get_matching_blocks()
> + a_cur = 0
> + b_cur = 0
> + a_iter = iter(annotated_a)
> + b_iter = iter(annotated_b)
> + for ai, bi, l in blocks:
# process mismatched sections before this block
> + for a_num, (revision, text) in xenumerate(a_iter, ai, a_cur):
> + yield status_a(revision, text)
> + for b_num, (revision, text) in xenumerate(b_iter, bi, b_cur):
> + yield status_b(revision, text)
# and now the matched section
> + for num, ((revision_a, text_a), (revision_b, text_b)) in \
> + xenumerate(izip(a_iter, b_iter), l):
> + assert text_a == text_b
> + yield "unchanged", text_a
> + a_cur = ai + l
> + b_cur = bi + l
> + # handle a final conflicting region
> + for a_num, (revision, text) in xenumerate(a_iter, start=a_cur):
> + yield status_a(revision, text)
> + for b_num, (revision, text) in xenumerate(b_iter, start=b_cur):
> + yield status_b(revision, text)
This looks good, very clear and concise.
>
>
> class _KnitComponentFile(object):
>
> === modified file 'a/bzrlib/osutils.py'
> --- a/bzrlib/osutils.py
> +++ b/bzrlib/osutils.py
> @@ -653,3 +653,14 @@
>
> def supports_executable():
> return sys.platform != "win32"
> +
> +
> +def xenumerate(iter, stop=None, start=0, step=1):
> + count = start
> + if start == stop:
> + return
> + for result in iter:
> + yield count, result
> + count += step
> + if count == stop:
> + return
Needs a docstring, perhaps including a doctest-style example of its use.
It seems a bit strange to me that the step parameter just changes the
way count walks up - i would naively have expected step=2 to return
every second item.
Is this really needed at all? From the way it's used above it might be
simpler and perhaps faster to simply put the annotated lines into lists
and then slice the lists.
It might be nice (not necessarily now) to make this accessible to the
user to help debug/understand changes, e.g.
bzr explain-merge foo.c
will give you on stdout something similar to what weave.py can now, e.g.
unchanged |Banana cup cake recipe
new-a |(serves 6)
unchanged |
>
> === modified file 'a/bzrlib/tests/test_knit.py'
> --- a/bzrlib/tests/test_knit.py
> +++ b/bzrlib/tests/test_knit.py
> @@ -368,6 +368,14 @@
> self.assertEqual(['revid', 'revid2', 'revid3'], knit.versions())
> self.assertEqual(['revid2'], knit.get_parents('revid3'))
>
> + def test_plan_merge(self):
> + my_knit = self.make_test_knit(annotate=True)
> + my_knit.add_lines('text1', [], split_lines(TEXT_1))
> + my_knit.add_lines('text1a', ['text1'], split_lines(TEXT_1A))
> + my_knit.add_lines('text1b', ['text1'], split_lines(TEXT_1B))
> + plan = list(my_knit.plan_merge('text1a', 'text1b'))
> + for plan_line, expected_line in zip(plan, AB_MERGE):
> + self.assertEqual(plan_line, expected_line)
>
>
> TEXT_1 = """\
> @@ -386,6 +394,14 @@
> - eggs
> - broken tea cups
> - self-raising flour
> +"""
> +
> +TEXT_1B = """\
> +Banana cup cake recipe
> +
> +- bananas (do not use plantains!!!)
> +- broken tea cups
> +- flour
> """
>
> delta_1_1a = """\
> @@ -405,6 +421,19 @@
> - carrot
> - mushrooms
> """
> +
> +AB_MERGE_TEXT="""unchanged|Banana cup cake recipe
> +new-a|(serves 6)
> +unchanged|
> +killed-b|- bananas
> +killed-b|- eggs
> +new-b|- bananas (do not use plantains!!!)
> +unchanged|- broken tea cups
> +new-a|- self-raising flour
> +new-b|- flour
> +"""
> +AB_MERGE=[tuple(l.split('|')) for l in AB_MERGE_TEXT.splitlines(True)]
> +
>
> def line_delta(from_lines, to_lines):
> """Generate line-based delta from one text to another"""
>
> === modified file 'a/bzrlib/tests/test_versionedfile.py'
> --- a/bzrlib/tests/test_versionedfile.py
> +++ b/bzrlib/tests/test_versionedfile.py
> @@ -17,6 +17,8 @@
> # along with this program; if not, write to the Free Software
> # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>
> +
> +from StringIO import StringIO
>
> import bzrlib
> import bzrlib.errors as errors
> @@ -34,7 +36,7 @@
> from bzrlib.transport.memory import MemoryTransport
> import bzrlib.versionedfile as versionedfile
> from bzrlib.weave import WeaveFile
> -from bzrlib.weavefile import read_weave
> +from bzrlib.weavefile import read_weave, write_weave
>
>
> class VersionedFileTestMixIn(object):
> @@ -815,3 +817,265 @@
>
> def get_factory(self):
> return KnitVersionedFile
> +
> +
> +class MergeCasesMixin(object):
> +
> + def doMerge(self, base, a, b, mp):
> + from cStringIO import StringIO
> + from textwrap import dedent
> +
> + def addcrlf(x):
> + return x + '\n'
> +
> + w = self.get_file()
> + w.add_lines('text0', [], map(addcrlf, base))
> + w.add_lines('text1', ['text0'], map(addcrlf, a))
> + w.add_lines('text2', ['text0'], map(addcrlf, b))
> +
> + self.log_contents(w)
> +
> + self.log('merge plan:')
> + p = list(w.plan_merge('text1', 'text2'))
> + for state, line in p:
> + if line:
> + self.log('%12s | %s' % (state, line[:-1]))
> +
> + self.log('merge:')
> + mt = StringIO()
> + mt.writelines(w.weave_merge(p))
> + mt.seek(0)
> + self.log(mt.getvalue())
> +
> + mp = map(addcrlf, mp)
> + self.assertEqual(mt.readlines(), mp)
> +
> +
> + def testOneInsert(self):
> + self.doMerge([],
> + ['aa'],
> + [],
> + ['aa'])
> +
> + def testSeparateInserts(self):
> + self.doMerge(['aaa', 'bbb', 'ccc'],
> + ['aaa', 'xxx', 'bbb', 'ccc'],
> + ['aaa', 'bbb', 'yyy', 'ccc'],
> + ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'])
> +
> + def testSameInsert(self):
> + self.doMerge(['aaa', 'bbb', 'ccc'],
> + ['aaa', 'xxx', 'bbb', 'ccc'],
> + ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'],
> + ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'])
> + overlappedInsertExpected = ['aaa', 'xxx', 'yyy', 'bbb']
> + def testOverlappedInsert(self):
> + self.doMerge(['aaa', 'bbb'],
> + ['aaa', 'xxx', 'yyy', 'bbb'],
> + ['aaa', 'xxx', 'bbb'], self.overlappedInsertExpected)
> +
> + # really it ought to reduce this to
> + # ['aaa', 'xxx', 'yyy', 'bbb']
> +
> +
> + def testClashReplace(self):
> + self.doMerge(['aaa'],
> + ['xxx'],
> + ['yyy', 'zzz'],
> + ['<<<<<<< ', 'xxx', '=======', 'yyy', 'zzz',
> + '>>>>>>> '])
> +
> + def testNonClashInsert1(self):
> + self.doMerge(['aaa'],
> + ['xxx', 'aaa'],
> + ['yyy', 'zzz'],
> + ['<<<<<<< ', 'xxx', 'aaa', '=======', 'yyy', 'zzz',
> + '>>>>>>> '])
> +
> + def testNonClashInsert2(self):
> + self.doMerge(['aaa'],
> + ['aaa'],
> + ['yyy', 'zzz'],
> + ['yyy', 'zzz'])
> +
> +
> + def testDeleteAndModify(self):
> + """Clashing delete and modification.
> +
> + If one side modifies a region and the other deletes it then
> + there should be a conflict with one side blank.
> + """
> +
> + #######################################
> + # skippd, not working yet
> + return
> +
> + self.doMerge(['aaa', 'bbb', 'ccc'],
> + ['aaa', 'ddd', 'ccc'],
> + ['aaa', 'ccc'],
> + ['<<<<<<<< ', 'aaa', '=======', '>>>>>>> ', 'ccc'])
> +
> + def _test_merge_from_strings(self, base, a, b, expected):
> + w = self.get_file()
> + w.add_lines('text0', [], base.splitlines(True))
> + w.add_lines('text1', ['text0'], a.splitlines(True))
> + w.add_lines('text2', ['text0'], b.splitlines(True))
> + self.log('merge plan:')
> + p = list(w.plan_merge('text1', 'text2'))
> + for state, line in p:
> + if line:
> + self.log('%12s | %s' % (state, line[:-1]))
> + self.log('merge result:')
> + result_text = ''.join(w.weave_merge(p))
> + self.log(result_text)
> + self.assertEqualDiff(result_text, expected)
> +
> + def test_weave_merge_conflicts(self):
> + # does weave merge properly handle plans that end with unchanged?
> + result = ''.join(self.get_file().weave_merge([('new-a', 'hello\n')]))
> + self.assertEqual(result, 'hello\n')
> +
> + def test_deletion_extended(self):
> + """One side deletes, the other deletes more.
> + """
> + base = """\
> + line 1
> + line 2
> + line 3
> + """
> + a = """\
> + line 1
> + line 2
> + """
> + b = """\
> + line 1
> + """
> + result = """\
> + line 1
> + """
> + self._test_merge_from_strings(base, a, b, result)
> +
> + def test_deletion_overlap(self):
> + """Delete overlapping regions with no other conflict.
> +
> + Arguably it'd be better to treat these as agreement, rather than
> + conflict, but for now conflict is safer.
> + """
> + base = """\
> + start context
> + int a() {}
> + int b() {}
> + int c() {}
> + end context
> + """
> + a = """\
> + start context
> + int a() {}
> + end context
> + """
> + b = """\
> + start context
> + int c() {}
> + end context
> + """
> + result = """\
> + start context
> +<<<<<<<
> + int a() {}
> +=======
> + int c() {}
> +>>>>>>>
> + end context
> + """
> + self._test_merge_from_strings(base, a, b, result)
> +
> + def test_agreement_deletion(self):
> + """Agree to delete some lines, without conflicts."""
> + base = """\
> + start context
> + base line 1
> + base line 2
> + end context
> + """
> + a = """\
> + start context
> + base line 1
> + end context
> + """
> + b = """\
> + start context
> + base line 1
> + end context
> + """
> + result = """\
> + start context
> + base line 1
> + end context
> + """
> + self._test_merge_from_strings(base, a, b, result)
> +
> + def test_sync_on_deletion(self):
> + """Specific case of merge where we can synchronize incorrectly.
> +
> + A previous version of the weave merge concluded that the two versions
> + agreed on deleting line 2, and this could be a synchronization point.
> + Line 1 was then considered in isolation, and thought to be deleted on
> + both sides.
> +
> + It's better to consider the whole thing as a disagreement region.
> + """
> + base = """\
> + start context
> + base line 1
> + base line 2
> + end context
> + """
> + a = """\
> + start context
> + base line 1
> + a's replacement line 2
> + end context
> + """
> + b = """\
> + start context
> + b replaces
> + both lines
> + end context
> + """
> + result = """\
> + start context
> +<<<<<<<
> + base line 1
> + a's replacement line 2
> +=======
> + b replaces
> + both lines
> +>>>>>>>
> + end context
> + """
> + self._test_merge_from_strings(base, a, b, result)
> +
> +
> +class TestKnitMerge(TestCaseWithTransport, MergeCasesMixin):
> +
> + def get_file(self, name='foo'):
> + return KnitVersionedFile(name, get_transport(self.get_url('.')),
> + delta=True, create=True)
> +
> + def log_contents(self, w):
> + pass
> +
> +
> +class TestWeaveMerge(TestCaseWithTransport, MergeCasesMixin):
> +
> + def get_file(self, name='foo'):
> + return WeaveFile(name, get_transport(self.get_url('.')), create=True)
> +
> + def log_contents(self, w):
> + self.log('weave is:')
> + tmpf = StringIO()
> + write_weave(w, tmpf)
> + self.log(tmpf.getvalue())
> +
> + overlappedInsertExpected = ['aaa', '<<<<<<< ', 'xxx', 'yyy', '=======',
> + 'xxx', '>>>>>>> ', 'bbb']
>
> === modified file 'a/bzrlib/tests/test_weave.py'
> --- a/bzrlib/tests/test_weave.py
> +++ b/bzrlib/tests/test_weave.py
> @@ -680,241 +680,6 @@
> self.assertEqual(k.get_lines(i), t)
>
> self.check_read_write(k)
> -
> -
> -class MergeCases(TestBase):
> - def doMerge(self, base, a, b, mp):
> - from cStringIO import StringIO
> - from textwrap import dedent
> -
> - def addcrlf(x):
> - return x + '\n'
> -
> - w = Weave()
> - w.add_lines('text0', [], map(addcrlf, base))
> - w.add_lines('text1', ['text0'], map(addcrlf, a))
> - w.add_lines('text2', ['text0'], map(addcrlf, b))
> -
> - self.log('weave is:')
> - tmpf = StringIO()
> - write_weave(w, tmpf)
> - self.log(tmpf.getvalue())
> -
> - self.log('merge plan:')
> - p = list(w.plan_merge('text1', 'text2'))
> - for state, line in p:
> - if line:
> - self.log('%12s | %s' % (state, line[:-1]))
> -
> - self.log('merge:')
> - mt = StringIO()
> - mt.writelines(w.weave_merge(p))
> - mt.seek(0)
> - self.log(mt.getvalue())
> -
> - mp = map(addcrlf, mp)
> - self.assertEqual(mt.readlines(), mp)
> -
> -
> - def testOneInsert(self):
> - self.doMerge([],
> - ['aa'],
> - [],
> - ['aa'])
> -
> - def testSeparateInserts(self):
> - self.doMerge(['aaa', 'bbb', 'ccc'],
> - ['aaa', 'xxx', 'bbb', 'ccc'],
> - ['aaa', 'bbb', 'yyy', 'ccc'],
> - ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'])
> -
> - def testSameInsert(self):
> - self.doMerge(['aaa', 'bbb', 'ccc'],
> - ['aaa', 'xxx', 'bbb', 'ccc'],
> - ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'],
> - ['aaa', 'xxx', 'bbb', 'yyy', 'ccc'])
> -
> - def testOverlappedInsert(self):
> - self.doMerge(['aaa', 'bbb'],
> - ['aaa', 'xxx', 'yyy', 'bbb'],
> - ['aaa', 'xxx', 'bbb'],
> - ['aaa', '<<<<<<< ', 'xxx', 'yyy', '=======', 'xxx',
> - '>>>>>>> ', 'bbb'])
> -
> - # really it ought to reduce this to
> - # ['aaa', 'xxx', 'yyy', 'bbb']
> -
> -
> - def testClashReplace(self):
> - self.doMerge(['aaa'],
> - ['xxx'],
> - ['yyy', 'zzz'],
> - ['<<<<<<< ', 'xxx', '=======', 'yyy', 'zzz',
> - '>>>>>>> '])
> -
> - def testNonClashInsert(self):
> - self.doMerge(['aaa'],
> - ['xxx', 'aaa'],
> - ['yyy', 'zzz'],
> - ['<<<<<<< ', 'xxx', 'aaa', '=======', 'yyy', 'zzz',
> - '>>>>>>> '])
> -
> - self.doMerge(['aaa'],
> - ['aaa'],
> - ['yyy', 'zzz'],
> - ['yyy', 'zzz'])
> -
> -
> - def testDeleteAndModify(self):
> - """Clashing delete and modification.
> -
> - If one side modifies a region and the other deletes it then
> - there should be a conflict with one side blank.
> - """
> -
> - #######################################
> - # skippd, not working yet
> - return
> -
> - self.doMerge(['aaa', 'bbb', 'ccc'],
> - ['aaa', 'ddd', 'ccc'],
> - ['aaa', 'ccc'],
> - ['<<<<<<<< ', 'aaa', '=======', '>>>>>>> ', 'ccc'])
> -
> - def _test_merge_from_strings(self, base, a, b, expected):
> - w = Weave()
> - w.add_lines('text0', [], base.splitlines(True))
> - w.add_lines('text1', ['text0'], a.splitlines(True))
> - w.add_lines('text2', ['text0'], b.splitlines(True))
> - self.log('merge plan:')
> - p = list(w.plan_merge('text1', 'text2'))
> - for state, line in p:
> - if line:
> - self.log('%12s | %s' % (state, line[:-1]))
> - self.log('merge result:')
> - result_text = ''.join(w.weave_merge(p))
> - self.log(result_text)
> - self.assertEqualDiff(result_text, expected)
> -
> - def test_deletion_extended(self):
> - """One side deletes, the other deletes more.
> - """
> - base = """\
> - line 1
> - line 2
> - line 3
> - """
> - a = """\
> - line 1
> - line 2
> - """
> - b = """\
> - line 1
> - """
> - result = """\
> - line 1
> - """
> - self._test_merge_from_strings(base, a, b, result)
> -
> - def test_deletion_overlap(self):
> - """Delete overlapping regions with no other conflict.
> -
> - Arguably it'd be better to treat these as agreement, rather than
> - conflict, but for now conflict is safer.
> - """
> - base = """\
> - start context
> - int a() {}
> - int b() {}
> - int c() {}
> - end context
> - """
> - a = """\
> - start context
> - int a() {}
> - end context
> - """
> - b = """\
> - start context
> - int c() {}
> - end context
> - """
> - result = """\
> - start context
> -<<<<<<<
> - int a() {}
> -=======
> - int c() {}
> ->>>>>>>
> - end context
> - """
> - self._test_merge_from_strings(base, a, b, result)
> -
> - def test_agreement_deletion(self):
> - """Agree to delete some lines, without conflicts."""
> - base = """\
> - start context
> - base line 1
> - base line 2
> - end context
> - """
> - a = """\
> - start context
> - base line 1
> - end context
> - """
> - b = """\
> - start context
> - base line 1
> - end context
> - """
> - result = """\
> - start context
> - base line 1
> - end context
> - """
> - self._test_merge_from_strings(base, a, b, result)
> -
> - def test_sync_on_deletion(self):
> - """Specific case of merge where we can synchronize incorrectly.
> -
> - A previous version of the weave merge concluded that the two versions
> - agreed on deleting line 2, and this could be a synchronization point.
> - Line 1 was then considered in isolation, and thought to be deleted on
> - both sides.
> -
> - It's better to consider the whole thing as a disagreement region.
> - """
> - base = """\
> - start context
> - base line 1
> - base line 2
> - end context
> - """
> - a = """\
> - start context
> - base line 1
> - a's replacement line 2
> - end context
> - """
> - b = """\
> - start context
> - b replaces
> - both lines
> - end context
> - """
> - result = """\
> - start context
> -<<<<<<<
> - base line 1
> - a's replacement line 2
> -=======
> - b replaces
> - both lines
> ->>>>>>>
> - end context
> - """
> - self._test_merge_from_strings(base, a, b, result)
>
>
> class JoinWeavesTests(TestBase):
>
> === modified file 'a/bzrlib/versionedfile.py'
> --- a/bzrlib/versionedfile.py
> +++ b/bzrlib/versionedfile.py
> @@ -395,11 +395,31 @@
> return iter(self.versions())
>
> def plan_merge(self, ver_a, ver_b):
> + """Return pseudo-annotation indicating how the two versions merge.
> +
> + This is computed between versions a and b and their common
> + base.
> +
> + Weave lines present in none of them are skipped entirely.
> +
> + Legend:
> + killed-base Dead in base revision
> + killed-both Killed in each revision
> + killed-a Killed in a
> + killed-b Killed in b
> + unchanged Alive in both a and b (possibly created in both)
> + new-a Created in a
> + new-b Created in b
> + ghost-a Killed in a, unborn in b
> + ghost-b Killed in b, unborn in a
> + irrelevant Not in either revision
> + """
> raise NotImplementedError(VersionedFile.plan_merge)
>
> def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
> b_marker=TextMerge.B_MARKER):
> return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
> +
>
> class PlanWeaveMerge(TextMerge):
> """Weave merge that takes a plan as its input.
> @@ -413,11 +433,23 @@
> TextMerge.__init__(self, a_marker, b_marker)
> self.plan = plan
>
> -
> def _merge_struct(self):
> lines_a = []
> lines_b = []
> ch_a = ch_b = False
> +
> + def outstanding_struct():
> + if not lines_a and not lines_b:
> + return
> + elif ch_a and not ch_b:
> + # one-sided change:
> + yield(lines_a,)
> + elif ch_b and not ch_a:
> + yield (lines_b,)
> + elif lines_a == lines_b:
> + yield(lines_a,)
> + else:
> + yield (lines_a, lines_b)
>
> # We previously considered either 'unchanged' or 'killed-both' lines
> # to be possible places to resynchronize. However, assuming agreement
> @@ -425,18 +457,8 @@
> for state, line in self.plan:
> if state == 'unchanged':
> # resync and flush queued conflicts changes if any
> - if not lines_a and not lines_b:
> - pass
> - elif ch_a and not ch_b:
> - # one-sided change:
> - yield(lines_a,)
> - elif ch_b and not ch_a:
> - yield (lines_b,)
> - elif lines_a == lines_b:
> - yield(lines_a,)
> - else:
> - yield (lines_a, lines_b)
> -
> + for struct in outstanding_struct():
> + yield struct
> lines_a = []
> lines_b = []
> ch_a = ch_b = False
> @@ -459,7 +481,8 @@
> else:
> assert state in ('irrelevant', 'ghost-a', 'ghost-b',
> 'killed-base', 'killed-both'), state
> -
> + for struct in outstanding_struct():
> + yield struct
>
> class WeaveMerge(PlanWeaveMerge):
> """Weave merge that takes a VersionedFile and two versions as its input"""
>
--
Martin
More information about the bazaar
mailing list