Rev 3522: The insertion ordering into the weave has an impact on conflicts. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

John Arbash Meinel john at arbash-meinel.com
Thu Jul 10 23:41:54 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

------------------------------------------------------------
revno: 3522
revision-id: john at arbash-meinel.com-20080710224146-pmkkzgc4vbxhgrsr
parent: john at arbash-meinel.com-20080710202241-7se48gbsuxm7ih4h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: weave_merge
timestamp: Thu 2008-07-10 17:41:46 -0500
message:
  The insertion ordering into the weave has an impact on conflicts.
  
  Basically, when A has ancestors B and C which have overlapping changes,
  inserting B before C will implicitly sort its lines earlier in the weave.
  If A resolves the conflict by preserving this ordering, then switching that
  ordering later will make those lines look like they originated in A, rather
  than in B and C. (Because diff can't track moved lines yet.)
  
  So bring in some explicit ordering constraints which are likely to fit the
  real world workflow. Specifically, prefer to add the left-hand parents before
  the right-hand parents. Since that is how the merge was done.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2008-05-29 20:17:37 +0000
+++ b/bzrlib/graph.py	2008-07-10 22:41:46 +0000
@@ -766,6 +766,53 @@
             common_walker.start_searching(new_common)
         return candidate_heads
 
+    def find_merge_order(self, tip_revision_id, lca_revision_ids):
+        """Find the order that each revision was merged into tip.
+
+        This basically just walks backwards with a stack, and walks left-first
+        until it finds a node to stop.
+        """
+        if len(lca_revision_ids) == 1:
+            return list(lca_revision_ids)
+        looking_for = set(lca_revision_ids)
+        # TODO: Is there a way we could do this "faster" by batching up the
+        # get_parent_map requests?
+        # TODO: Should we also be culling the ancestry search right away? We
+        # could add looking_for to the "stop" list, and walk their
+        # ancestry in batched mode. The flip side is it might mean we walk a
+        # lot of "stop" nodes, rather than only the minimum.
+        # Then again, without it we may trace back into ancestry we could have
+        # stopped early.
+        stack = [tip_revision_id]
+        found = []
+        stop = set()
+        while stack and looking_for:
+            next = stack.pop()
+            stop.add(next)
+            if next in looking_for:
+                found.append(next)
+                looking_for.remove(next)
+                if len(looking_for) == 1:
+                    found.append(looking_for.pop())
+                    break
+                continue
+            parent_ids = self.get_parent_map([next]).get(next, None)
+            if not parent_ids: # Ghost, nothing to search here
+                continue
+            for parent_id in reversed(parent_ids):
+                # TODO: (performance) We see the parent at this point, but we
+                #       wait to mark it until later to make sure we get left
+                #       parents before right parents. However, instead of
+                #       waiting until we have traversed enough parents, we
+                #       could instead note that we've found it, and once all
+                #       parents are in the stack, just reverse iterate the
+                #       stack for them.
+                if parent_id not in stop:
+                    # this will need to be searched
+                    stack.append(parent_id)
+                stop.add(parent_id)
+        return found
+
     def find_unique_lca(self, left_revision, right_revision,
                         count_steps=False):
         """Find a unique LCA.

=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py	2008-07-10 20:22:41 +0000
+++ b/bzrlib/merge.py	2008-07-10 22:41:46 +0000
@@ -1433,14 +1433,18 @@
             # Map a plain NULL_REVISION to a simple no-ancestors
             if next_lcas == set([NULL_REVISION]):
                 next_lcas = ()
+            # Order the lca's based on when they were merged into the first
+            # tip. While weave merge uses a set() of active revisions, the
+            # order of insertion *does* effect the implicit ordering of the
+            # texts.
+            next_lcas = tuple(self.graph.find_merge_order(cur_ancestors[0],
+                                                          next_lcas))
             for rev_key in cur_ancestors:
-                # These don't need to be properly sorted, because
-                # weave.add_lines() just treats them as a set.
-                parent_map[rev_key] = tuple(next_lcas)
+                parent_map[rev_key] = next_lcas
             if len(next_lcas) == 0:
                 break
             elif len(next_lcas) == 1:
-                parent_map[next_lcas.pop()] = ()
+                parent_map[next_lcas[0]] = ()
                 break
             else:
                 # More than 2 lca's, fall back to grabbing all nodes between
@@ -1454,7 +1458,7 @@
                     # No common base to find, use the full ancestry
                     unique_lca = None
                 else:
-                    unique_lca = cur_lcas.pop()
+                    unique_lca = list(cur_lcas)[0]
                 parent_map.update(self._find_unique_parents(next_lcas,
                                                             unique_lca))
                 break
@@ -1521,9 +1525,16 @@
         # ordering resolution in the output. Specifically, if you add A then B,
         # then in the output text A lines will show up before B lines. And, of
         # course, topo_sort doesn't guarantee any real ordering.
-        # Maybe we want to use merge_sorted? Though we would need to add a
-        # 'pseudo' node for the tip.
-        for key in tsort.topo_sort(parent_map):
+        # So we use merge_sort, and add a fake node on the tip.
+        # This ensures that left-hand parents will always be inserted into the
+        # weave before right-hand parents.
+        tip_key = self._key_prefix + (_mod_revision.CURRENT_REVISION,)
+        parent_map[tip_key] = (self.a_key, self.b_key)
+
+        for seq_num, key, depth, eom in reversed(tsort.merge_sort(parent_map,
+                                                                  tip_key)):
+            if key == tip_key:
+                continue
             parent_keys = parent_map[key]
             revision_id = key[-1]
             parent_ids = [k[-1] for k in parent_keys]

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2008-05-29 20:17:37 +0000
+++ b/bzrlib/tests/test_graph.py	2008-07-10 22:41:46 +0000
@@ -1312,6 +1312,36 @@
         self.assertFindDistance(6, graph, 'g', [('i', 8)])
 
 
+class TestFindMergeOrder(TestGraphBase):
+
+    def assertMergeOrder(self, expected, graph, tip, base_revisions):
+        self.assertEqual(expected, graph.find_merge_order(tip, base_revisions))
+
+    def test_parents(self):
+        graph = self.make_graph(ancestry_1)
+        self.assertMergeOrder(['rev3', 'rev2b'], graph, 'rev4',
+                                                        ['rev3', 'rev2b'])
+        self.assertMergeOrder(['rev3', 'rev2b'], graph, 'rev4',
+                                                        ['rev2b', 'rev3'])
+
+    def test_ancestors(self):
+        graph = self.make_graph(ancestry_1)
+        self.assertMergeOrder(['rev1', 'rev2b'], graph, 'rev4',
+                                                        ['rev1', 'rev2b'])
+        self.assertMergeOrder(['rev1', 'rev2b'], graph, 'rev4',
+                                                        ['rev2b', 'rev1'])
+
+    def test_shortcut_one_ancestor(self):
+        # When we have enough info, we can stop searching
+        graph = self.make_breaking_graph(ancestry_1, ['rev3', 'rev2b', 'rev4'])
+        # Single ancestors shortcut right away
+        self.assertMergeOrder(['rev3'], graph, 'rev4', ['rev3'])
+
+    def test_shortcut_after_one_ancestor(self):
+        graph = self.make_breaking_graph(ancestry_1, ['rev2a', 'rev2b'])
+        self.assertMergeOrder(['rev3', 'rev1'], graph, 'rev4', ['rev1', 'rev3'])
+
+
 class TestCachingParentsProvider(tests.TestCase):
 
     def setUp(self):

=== modified file 'bzrlib/tests/test_merge.py'
--- a/bzrlib/tests/test_merge.py	2008-07-10 20:22:41 +0000
+++ b/bzrlib/tests/test_merge.py	2008-07-10 22:41:46 +0000
@@ -508,10 +508,10 @@
                           ('unchanged', 'a\n'),
                           ('killed-a', 'b\n'),
                           ('killed-b', 'c\n'),
-                          ('new-b', 'g\n'),
                           ('new-a', 'e\n'),
                           ('new-a', 'h\n'),
-                          ('new-a', 'g\n')],
+                          ('new-a', 'g\n'),
+                          ('new-b', 'g\n')],
                          list(plan))
 
     def test_plan_merge_cherrypick(self):
@@ -536,12 +536,12 @@
         self.add_version(('root', 'B'), [], 'xyz')
         my_plan = _PlanMerge('A', 'B', self.plan_merge_vf, ('root',))
         self.assertEqual([
+                          ('new-a', 'a\n'),
+                          ('new-a', 'b\n'),
+                          ('new-a', 'c\n'),
                           ('new-b', 'x\n'),
                           ('new-b', 'y\n'),
-                          ('new-b', 'z\n'),
-                          ('new-a', 'a\n'),
-                          ('new-a', 'b\n'),
-                          ('new-a', 'c\n')],
+                          ('new-b', 'z\n')],
                           list(my_plan.plan_merge()))
 
     def test_plan_merge_uncommitted_files(self):
@@ -552,15 +552,73 @@
                           ('unchanged', 'a\n'),
                           ('killed-a', 'b\n'),
                           ('killed-b', 'c\n'),
-                          ('new-b', 'g\n'),
                           ('new-a', 'e\n'),
                           ('new-a', 'h\n'),
-                          ('new-a', 'g\n')],
+                          ('new-a', 'g\n'),
+                          ('new-b', 'g\n')],
+                         list(plan))
+
+    def test_plan_merge_insert_order(self):
+        """Weave merges are sensitive to the order of insertion.
+        
+        Specifically for overlapping regions, it effects which region gets put
+        'first'. And when a user resolves an overlapping merge, if they use the
+        same ordering, then the lines match the parents, if they don't only
+        *some* of the lines match.
+        """
+        self.add_version(('root', 'A'), [], 'abcdef')
+        self.add_version(('root', 'B'), [('root', 'A')], 'abwxcdef')
+        self.add_version(('root', 'C'), [('root', 'A')], 'abyzcdef')
+        # Merge, and resolve the conflict by adding *both* sets of lines
+        # If we get the ordering wrong, these will look like new lines in D,
+        # rather than carried over from B, C
+        self.add_version(('root', 'D'), [('root', 'B'), ('root', 'C')],
+                         'abwxyzcdef')
+        # Supersede the lines in B and delete the lines in C, which will
+        # conflict if they are treated as being in D
+        self.add_version(('root', 'E'), [('root', 'C'), ('root', 'B')],
+                         'abnocdef')
+        # Same thing for the lines in C
+        self.add_version(('root', 'F'), [('root', 'C')], 'abpqcdef')
+        plan = self.plan_merge_vf.plan_merge('D', 'E')
+        self.assertEqual([
+                          ('unchanged', 'a\n'),
+                          ('unchanged', 'b\n'),
+                          ('killed-b', 'w\n'),
+                          ('killed-b', 'x\n'),
+                          ('killed-b', 'y\n'),
+                          ('killed-b', 'z\n'),
+                          ('new-b', 'n\n'),
+                          ('new-b', 'o\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'd\n'),
+                          ('unchanged', 'e\n'),
+                          ('unchanged', 'f\n')],
+                         list(plan))
+        plan = self.plan_merge_vf.plan_merge('E', 'D')
+        # Going in the opposite direction shows the effect of the opposite plan
+        self.assertEqual([
+                          ('unchanged', 'a\n'),
+                          ('unchanged', 'b\n'),
+                          ('new-b', 'w\n'),
+                          ('new-b', 'x\n'),
+                          ('killed-a', 'y\n'),
+                          ('killed-a', 'z\n'),
+                          ('killed-both', 'w\n'),
+                          ('killed-both', 'x\n'),
+                          ('new-a', 'n\n'),
+                          ('new-a', 'o\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'd\n'),
+                          ('unchanged', 'e\n'),
+                          ('unchanged', 'f\n')],
                          list(plan))
 
     def test_plan_merge_criss_cross(self):
         # This is specificly trying to trigger problems when using limited
         # ancestry and weaves. The ancestry graph looks like:
+        #       XX      unused ancestor, should not show up in the weave
+        #       |
         #       A       Unique LCA
         #       |\
         #       B \     Introduces a line 'foo'
@@ -581,7 +639,8 @@
         #   'foo', it should appear as superseding the value in F (since it
         #   came from B), rather than conflict because of the resolution during
         #   C & D.
-        self.add_version(('root', 'A'), [], 'abcdef')
+        self.add_version(('root', 'XX'), [], 'qrs')
+        self.add_version(('root', 'A'), [('root', 'XX')], 'abcdef')
         self.add_version(('root', 'B'), [('root', 'A')], 'axcdef')
         self.add_version(('root', 'C'), [('root', 'B')], 'axcdefg')
         self.add_version(('root', 'D'), [('root', 'B')], 'haxcdef')



More information about the bazaar-commits mailing list