Rev 3516: Restore a real weave merge to 'bzr merge --weave'. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

Wed Jul 9 22:42:26 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

------------------------------------------------------------
revno: 3516
revision-id: john at arbash-meinel.com-20080709214224-r75k87r6a01pfc3h
parent: aaron at aaronbentley.com-20080630192337-3btwipid5vm0mty9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: weave_merge
timestamp: Wed 2008-07-09 16:42:24 -0500
message:
  Restore a real weave merge to 'bzr merge --weave'.
  
  To do so efficiently, we only add the simple LCAs to the final weave
  object, unless we run into complexities with the merge graph.
  This gives the same effective result as adding all the texts,
  with the advantage of not having to extract all of them.
modified:
  bzrlib/merge.py                merge.py-20050513021216-953b65a438527106
  bzrlib/tests/test_merge.py     testmerge.py-20050905070950-c1b5aa49ff911024
  bzrlib/tests/test_weave.py     testknit.py-20050627023648-9833cc5562ffb785
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
-------------- next part --------------
=== modified file 'bzrlib/merge.py'

--- a/bzrlib/merge.py	2008-06-25 10:06:48 +0000
+++ b/bzrlib/merge.py	2008-07-09 21:42:24 +0000
@@ -1258,9 +1258,12 @@
         self._last_lines_revision_id = None
         self._cached_matching_blocks = {}
         self._key_prefix = key_prefix
-        lines = self.get_lines([a_rev, b_rev])
-        self.lines_a = lines[a_rev]
-        self.lines_b = lines[b_rev]
+        self._precache_tip_lines()
+
+    def _precache_tip_lines(self):
+        lines = self.get_lines([self.a_rev, self.b_rev])
+        self.lines_a = lines[self.a_rev]
+        self.lines_b = lines[self.b_rev]
 
     def get_lines(self, revisions):
         """Get lines for revisions from the backing VersionedFiles.
@@ -1392,59 +1395,118 @@
     """Plan an annotate merge using on-the-fly annotation"""
 
     def __init__(self, a_rev, b_rev, vf, key_prefix):
-        _PlanMergeBase.__init__(self, a_rev, b_rev, vf, key_prefix)
-        graph = Graph(vf)
-        # XXX: There is probably a better API to use to examine less history.
-        a_ancestry = set(chain(*graph._make_breadth_first_searcher(
-            [key_prefix + (a_rev,)])))
-        b_ancestry = set(chain(*graph._make_breadth_first_searcher(
-            [key_prefix + (b_rev,)])))
-        self.uncommon = set(key[-1] for key in
-            a_ancestry.symmetric_difference(b_ancestry))
-
-    def _determine_status(self, revision_id, unique_line_numbers):
-        """Determines the status unique lines versus all lcas.
-
-        Basically, determines why the line is unique to this revision.
-
-        A line may be determined new or killed, but not both.
-
-        :param revision_id: The id of the revision in which the lines are
-            unique
-        :param unique_line_numbers: The line numbers of unique lines.
-        :return a tuple of (new_this, killed_other):
-        """
-        new = self._find_new(revision_id)
-        killed = set(unique_line_numbers).difference(new)
-        return new, killed
-
-    def _find_new(self, version_id):
-        """Determine which lines are new in the ancestry of this version.
-
-        If a lines is present in this version, and not present in any
-        common ancestor, it is considered new.
-        """
-        if version_id not in self.uncommon:
-            return set()
-        key = self._key_prefix + (version_id,)
-        parent_map = self.vf.get_parent_map([key])
-        parents = tuple(parent[-1] for parent in parent_map[key])
-        if len(parents) == 0:
-            return set(range(len(self.get_lines([version_id])[version_id])))
-        new = None
-        for parent in parents:
-            blocks = self._get_matching_blocks(version_id, parent)
-            result, unused = self._unique_lines(blocks)
-            parent_new = self._find_new(parent)
-            for i, j, n in blocks:
-                for ii, jj in [(i+r, j+r) for r in range(n)]:
-                    if jj in parent_new:
-                        result.append(ii)
-            if new is None:
-                new = set(result)
+        super(_PlanMerge, self).__init__(a_rev, b_rev, vf, key_prefix)
+        self.a_key = self._key_prefix + (self.a_rev,)
+        self.b_key = self._key_prefix + (self.b_rev,)
+        self.graph = Graph(self.vf)
+        # heads = self.graph.heads((self.a_key, self.b_key))
+        # if len(heads) == 1:
+        #     # one side dominates, so we can just return its values, yay for
+        #     # per-file graphs
+        #     # Ideally we would know that before we get this far
+        #     self._head_key = heads.pop()
+        #     if self._head_key == self.a_key:
+        #         other = b_rev
+        #     else:
+        #         other = a_rev
+        #     mutter('found dominating revision for %s\n%s > %s', self.vf,
+        #            self._head_key[-1], other)
+        #     self._weave = None
+        # else:
+        self._head_key = None
+        self._build_weave()
+
+    def _precache_tip_lines(self):
+        # Turn this into a no-op, because we will do this later
+        pass
+
+    def _find_recursive_lcas(self):
+        """Find all the ancestors back to a unique lca"""
+        cur_ancestors = (self.a_key, self.b_key)
+        ancestor_keys = [cur_ancestors]
+        # graph.find_lca(uncommon, keys) now returns plain NULL_REVISION,
+        # rather than a key tuple, but everything else expects tuples, so we
+        # adapt the result to be normalized, this way we don't have to special
+        # case _get_interesting_texts, etc.
+        null_key = self._key_prefix + (NULL_REVISION,)
+        while True:
+            next_lcas = self.graph.find_lca(*cur_ancestors)
+            ancestor_keys.append(next_lcas)
+            if len(next_lcas) == 0:
+                ancestor_keys[-1] = [null_key]
+                self.vf.add_lines(null_key, [], [])
+                break
+            elif len(next_lcas) == 1:
+                if next_lcas == set([NULL_REVISION]):
+                    ancestor_keys[-1] = [null_key]
+                    self.vf.add_lines(null_key, [], [])
+                break
+            cur_ancestors = next_lcas
+        ancestor_keys.reverse()
+        return ancestor_keys
+
+    def _get_interesting_texts(self, lca_keys):
+        """Return a dict of texts we are interested in.
+
+        Note that the input is in key tuples, but the output is in plain
+        revision ids.
+
+        :param lca_keys: The output from _find_recursive_lcas
+        :return: A dict of {'revision_id':lines} as returned by
+            _PlanMergeBase.get_lines()
+        """
+        all_revision_ids = set()
+        # lcas are in keys, but get_lines works in revision_ids
+        for ancestor_keys in lca_keys:
+            all_revision_ids.update([key[-1] for key in ancestor_keys])
+        all_revision_ids.add(self.a_rev)
+        all_revision_ids.add(self.b_rev)
+
+        all_texts = self.get_lines(all_revision_ids)
+        return all_texts
+
+    def _build_weave(self):
+        from bzrlib import weave
+        self._weave = weave.Weave(weave_name='in_memory_weave',
+                                  allow_reserved=True)
+        lca_keys = self._find_recursive_lcas()
+
+        all_texts = self._get_interesting_texts(lca_keys)
+
+        last_parents = ()
+        for ancestor_keys in lca_keys:
+            for ancestor_key in ancestor_keys:
+                ancestor = ancestor_key[-1]
+                if self._weave.has_version(ancestor):
+                    # Most likely this happened because one node purely
+                    # dominated another in the per-file graph. That is okay, we
+                    # already have it in the weave, and the plan should be very
+                    # straightforward.
+                    continue
+                self._weave.add_lines(ancestor, last_parents,
+                                      all_texts[ancestor])
+            last_parents = [a[-1] for a in ancestor_keys]
+
+    def plan_merge(self):
+        """Generate a 'plan' for merging the two revisions.
+
+        This involves comparing their texts and determining the cause of
+        differences.  If text A has a line and text B does not, then either the
+        line was added to text A, or it was deleted from B.  Once the causes
+        are combined, they are written out in the format described in
+        VersionedFile.plan_merge
+        """
+        if self._head_key is not None: # There was a single head
+            if self._head_key == self.a_key:
+                plan = 'new-a'
             else:
-                new.intersection_update(result)
-        return new
+                if self._head_key != self.b_key:
+                    raise AssertionError('There was an invalid head: %s != %s'
+                                         % (self.b_key, self._head_key))
+                plan = 'new-b'
+            lines = self.get_lines([self._head_key[-1]])[self._head_key[-1]]
+            return ((plan, line) for line in lines)
+        return self._weave.plan_merge(self.a_rev, self.b_rev)
 
 
 class _PlanLCAMerge(_PlanMergeBase):

=== modified file 'bzrlib/tests/test_merge.py'
--- a/bzrlib/tests/test_merge.py	2008-06-30 19:23:37 +0000
+++ b/bzrlib/tests/test_merge.py	2008-07-09 21:42:24 +0000
@@ -500,26 +500,35 @@
             plan._get_matching_blocks('B', 'C')),
             ([1, 2, 3], [0, 2]))
 
-    def test_find_new(self):
-        plan = self.setup_plan_merge()
-        self.assertEqual(set([2, 3, 4]), plan._find_new('B'))
-        self.assertEqual(set([0, 3]), plan._find_new('C'))
-
-    def test_find_new2(self):
+    def test_plan_merge_cherrypick(self):
         self.add_version(('root', 'A'), [], 'abc')
         self.add_version(('root', 'B'), [('root', 'A')], 'abcde')
         self.add_version(('root', 'C'), [('root', 'A')], 'abcefg')
         self.add_version(('root', 'D'),
             [('root', 'A'), ('root', 'B'), ('root', 'C')], 'abcdegh')
         my_plan = _PlanMerge('B', 'D', self.plan_merge_vf, ('root',))
-        self.assertEqual(set([5, 6]), my_plan._find_new('D'))
-        self.assertEqual(set(), my_plan._find_new('A'))
+        self.assertEqual([
+                          ('unchanged', 'a\n'),
+                          ('unchanged', 'b\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'd\n'),
+                          ('unchanged', 'e\n'),
+                          ('new-b', 'g\n'),
+                          ('new-b', 'h\n')],
+                          list(my_plan.plan_merge()))
 
-    def test_find_new_no_ancestors(self):
+    def test_plan_merge_no_common_ancestor(self):
         self.add_version(('root', 'A'), [], 'abc')
         self.add_version(('root', 'B'), [], 'xyz')
-        my_plan = _PlanMerge('A', 'B', self.vf, ('root',))
-        self.assertEqual(set([0, 1, 2]), my_plan._find_new('A'))
+        my_plan = _PlanMerge('A', 'B', self.plan_merge_vf, ('root',))
+        self.assertEqual([
+                          ('new-a', 'a\n'),
+                          ('new-a', 'b\n'),
+                          ('new-a', 'c\n'),
+                          ('new-b', 'x\n'),
+                          ('new-b', 'y\n'),
+                          ('new-b', 'z\n')],
+                          list(my_plan.plan_merge()))
 
     def test_plan_merge(self):
         self.setup_plan_merge()
@@ -527,11 +536,12 @@
         self.assertEqual([
                           ('new-b', 'f\n'),
                           ('unchanged', 'a\n'),
+                          ('killed-a', 'b\n'),
                           ('killed-b', 'c\n'),
                           ('new-a', 'e\n'),
                           ('new-a', 'h\n'),
-                          ('killed-a', 'b\n'),
-                          ('unchanged', 'g\n')],
+                          ('new-a', 'g\n'),
+                          ('new-b', 'g\n')],
                          list(plan))
 
     def test_plan_merge_uncommitted_files(self):
@@ -540,11 +550,12 @@
         self.assertEqual([
                           ('new-b', 'f\n'),
                           ('unchanged', 'a\n'),
+                          ('killed-a', 'b\n'),
                           ('killed-b', 'c\n'),
                           ('new-a', 'e\n'),
                           ('new-a', 'h\n'),
-                          ('killed-a', 'b\n'),
-                          ('unchanged', 'g\n')],
+                          ('new-a', 'g\n'),
+                          ('new-b', 'g\n')],
                          list(plan))
 
     def test_subtract_plans(self):
@@ -662,8 +673,21 @@
         self.add_version(('root', 'A'), [('root', 'C')], 'b')
         self.add_version(('root', 'B'), [('root', 'C')], '')
         plan = self.plan_merge_vf.plan_merge('A', 'B')
-        self.assertEqual([('new-a', 'b\n'),
-                          ('killed-both', 'a\n')
+        self.assertEqual([('killed-both', 'a\n'),
+                          ('new-a', 'b\n'),
+                         ], list(plan))
+
+    def test_plan_merge_with_move_and_change(self):
+        self.add_version(('root', 'C'), [], 'abcd')
+        self.add_version(('root', 'A'), [('root', 'C')], 'acbd')
+        self.add_version(('root', 'B'), [('root', 'C')], 'aBcd')
+        plan = self.plan_merge_vf.plan_merge('A', 'B')
+        self.assertEqual([('unchanged', 'a\n'),
+                          ('new-a', 'c\n'),
+                          ('killed-b', 'b\n'),
+                          ('new-b', 'B\n'),
+                          ('killed-a', 'c\n'),
+                          ('unchanged', 'd\n'),
                          ], list(plan))
 
 
@@ -703,30 +727,34 @@
         self.assertFileEqual('d\na\nb\nc\n', 'this/file1')
         self.assertFileEqual('d\na\nb\n', 'this/file2')
 
-    def test_merge_delete_and_change(self):
+    def test_merge_move_and_change(self):
         this_tree = self.make_branch_and_tree('this')
         this_tree.lock_write()
         self.addCleanup(this_tree.unlock)
         self.build_tree_contents([
-            ('this/file1', 'a\nb\n'),
+            ('this/file1', 'line 1\nline 2\nline 3\nline 4\n'),
         ])
         this_tree.add('file1',)
         this_tree.commit('Added file')
         other_tree = this_tree.bzrdir.sprout('other').open_workingtree()
         self.build_tree_contents([
-            ('other/file1', 'a\nc\n'),
+            ('other/file1', 'line 1\nline 2 to 2.1\nline 3\nline 4\n'),
         ])
-        other_tree.commit('Changed b to c')
+        other_tree.commit('Swapped 2 & 3')
         self.build_tree_contents([
-            ('this/file1', 'a\n'),
+            ('this/file1', 'line 1\nline 3\nline 2\nline 4\n'),
         ])
-        this_tree.commit('Deleted b')
+        this_tree.commit('Changed 2 to 2.1')
         self.do_merge(this_tree, other_tree)
-        self.assertFileEqual('a\n'
+        self.assertFileEqual('line 1\n'
             '<<<<<<< TREE\n'
+            'line 3\n'
+            'line 2\n'
             '=======\n'
-            'c\n'
-            '>>>>>>> MERGE-SOURCE\n', 'this/file1')
+            'line 2 to 2.1\n'
+            'line 3\n'
+            '>>>>>>> MERGE-SOURCE\n'
+            'line 4\n', 'this/file1')
 
 
 class TestMerge3Merge(TestCaseWithTransport, TestMergeImplementation):
@@ -742,3 +770,7 @@
 class TestLCAMerge(TestCaseWithTransport, TestMergeImplementation):
 
     merge_type = _mod_merge.LCAMerger
+
+    def test_merge_move_and_change(self):
+        self.expectFailure("lca merge doesn't conflict for move and change",
+            super(TestLCAMerge, self).test_merge_move_and_change)

=== modified file 'bzrlib/tests/test_weave.py'
--- a/bzrlib/tests/test_weave.py	2008-04-29 05:27:08 +0000
+++ b/bzrlib/tests/test_weave.py	2008-07-09 21:42:24 +0000
@@ -707,6 +707,21 @@
         self.assertRaises(errors.WeaveInvalidChecksum, w.check)
 
 
+class TestWeave(TestCase):
+
+    def test_allow_reserved_false(self):
+        w = Weave('name', allow_reserved=False)
+        # Add lines is checked at the WeaveFile level, not at the Weave level
+        w.add_lines('name:', [], TEXT_1)
+        # But get_lines is checked at this level
+        self.assertRaises(errors.ReservedId, w.get_lines, 'name:')
+
+    def test_allow_reserved_true(self):
+        w = Weave('name', allow_reserved=True)
+        w.add_lines('name:', [], TEXT_1)
+        self.assertEqual(TEXT_1, w.get_lines('name:'))
+
+
 class InstrumentedWeave(Weave):
     """Keep track of how many times functions are called."""
     

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-06-11 04:20:16 +0000
+++ b/bzrlib/weave.py	2008-07-09 21:42:24 +0000
@@ -214,9 +214,10 @@
     """
 
     __slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
-                 '_weave_name', '_matcher']
+                 '_weave_name', '_matcher', '_allow_reserved']
     
-    def __init__(self, weave_name=None, access_mode='w', matcher=None, get_scope=None):
+    def __init__(self, weave_name=None, access_mode='w', matcher=None,
+                 get_scope=None, allow_reserved=False):
         """Create a weave.
 
         :param get_scope: A callable that returns an opaque object to be used
@@ -239,6 +240,7 @@
         self._get_scope = get_scope
         self._scope = get_scope()
         self._access_mode = access_mode
+        self._allow_reserved = allow_reserved
 
     def __repr__(self):
         return "Weave(%r)" % self._weave_name
@@ -278,7 +280,8 @@
 
     def _lookup(self, name):
         """Convert symbolic version name to index."""
-        self.check_not_reserved_id(name)
+        if not self._allow_reserved:
+            self.check_not_reserved_id(name)
         try:
             return self._name_map[name]
         except KeyError:
@@ -653,7 +656,8 @@
                 # not in either revision
                 yield 'irrelevant', line
 
-        yield 'unchanged', ''           # terminator
+        # This doesn't seem to be used anymore
+        # yield 'unchanged', ''           # terminator
 
     def _extract(self, versions):
         """Yield annotation of lines in included set.
@@ -907,7 +911,8 @@
         
         :param create: If not True, only open an existing knit.
         """
-        super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope)
+        super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
+            allow_reserved=False)
         self._transport = transport
         self._filemode = filemode
         try: