Rev 3524: Handle more edge cases. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

Fri Jul 11 22:28:42 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

------------------------------------------------------------
revno: 3524
revision-id: john at arbash-meinel.com-20080711212836-1ehghrnzgaioqwou
parent: john at arbash-meinel.com-20080711140234-djkprpvy1738nbn1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: weave_merge
timestamp: Fri 2008-07-11 16:28:36 -0500
message:
  Handle more edge cases.
  
  Specifically, we don't have to switch on the whole-ancestry logic until we have
  3 or more LCAs, we were doing it when there was 2.
  However, when we do switch on the whole ancestry logic, it is possible to
  get extra 'tails' which have ancestors that are in the ancestry of the
  unique lca, but they themselves do not terminate on the unique lca.
  The current code is a bit of a compromise, as we probably should chase down
  more nodes to include. If we include more than 1 lca, then we run into the
  same problem, where common lines have to have one side 'win' and the other 'lose'.
  A different possibility would be to create an artificial ancestor node,
  which uses the common lines from the different LCAs, so that it is assumed they
  all come from the same source. This would end up ignoring cases where changes
  were reverted and then reintroduced.
-------------- next part --------------
=== modified file 'bzrlib/merge.py'

--- a/bzrlib/merge.py	2008-07-11 14:02:34 +0000
+++ b/bzrlib/merge.py	2008-07-11 21:28:36 +0000
@@ -1449,7 +1449,7 @@
             elif len(next_lcas) == 1:
                 parent_map[list(next_lcas)[0]] = ()
                 break
-            else:
+            elif len(next_lcas) > 2:
                 # More than 2 lca's, fall back to grabbing all nodes between
                 # this and the unique lca.
                 mutter('More than 2 LCAs, falling back to all nodes for: %s',
@@ -1492,10 +1492,34 @@
             parent_map = self.graph.get_parent_map(interesting)
             parent_map[base_key] = ()
         culled_parent_map = {}
+        extra_base_ancestors = set()
         for key, parent_keys in parent_map.iteritems():
             culled_parent_keys = tuple([p for p in parent_keys
                                            if p in parent_map])
+            if not culled_parent_keys and key is not base_key:
+                # We have another 'tail', make sure to bring it into the
+                # ancestry, so that we don't think all of its lines are unique.
+                lcas = self.graph.find_lca(key, base_key)
+                if lcas == set([NULL_REVISION]):
+                    lcas = ()
+                if len(lcas) == 0:
+                    # Nothing to do, no common ancestor
+                    pass
+                else:
+                    # Technically, this isn't 100% correct, but it is better
+                    # than nothing. If we have more than 1 LCA, we probably
+                    # should keep tracking the rabbit down the hole, so that we
+                    # get proper annotations for lines.  For now, though, we
+                    # just add the first lca, and live with it
+                    lca = list(lcas)[0]
+                    extra_base_ancestors.add(lca)
+                    culled_parent_keys = (lca,)
+                    if lca not in culled_parent_map:
+                        culled_parent_map[lca] = ()
             culled_parent_map[key] = culled_parent_keys
+        if extra_base_ancestors:
+            culled_parent_map[base_key] = self.graph.find_merge_order(base_key,
+                                                            extra_base_ancestors)
         return culled_parent_map
 
     def _get_interesting_texts(self, parent_map):

=== modified file 'bzrlib/tests/test_merge.py'
--- a/bzrlib/tests/test_merge.py	2008-07-10 22:41:46 +0000
+++ b/bzrlib/tests/test_merge.py	2008-07-11 21:28:36 +0000
@@ -478,6 +478,10 @@
     def add_version(self, key, parents, text):
         self.vf.add_lines(key, parents, [c+'\n' for c in text])
 
+    def add_rev(self, prefix, revision_id, parents, text):
+        self.add_version((prefix, revision_id), [(prefix, p) for p in parents],
+                         text)
+
     def add_uncommitted_version(self, key, parents, text):
         self.plan_merge_vf.add_lines(key, parents,
                                      [c+'\n' for c in text])
@@ -544,6 +548,107 @@
                           ('new-b', 'z\n')],
                           list(my_plan.plan_merge()))
 
+    def test_plan_merge_tail_ancestors(self):
+        # The graph looks like this:
+        #       A       # Common to all ancestors
+        #      / \
+        #     B   C     # Ancestors of E, only common to one side
+        #     |\ /|
+        #     D E F     # D, F are unique to G, H respectively
+        #     |/ \|     # E is the LCA for G & H, and the unique LCA for
+        #     G   H     # I, J
+        #     |\ /|
+        #     | X |
+        #     |/ \|
+        #     I   J     # criss-cross merge of G, H
+        #
+        # In this situation, a simple pruning of ancestors of E will leave D &
+        # F "dangling", which looks like they introduce lines different from
+        # the ones in E, but in actuality C&B introduced the lines, and they
+        # are already present in E
+
+        # Introduce the base text
+        self.add_rev('root', 'A', [], 'abc')
+        # Introduces a new line B
+        self.add_rev('root', 'B', ['A'], 'aBbc')
+        # Introduces a new line C
+        self.add_rev('root', 'C', ['A'], 'abCc')
+        # Introduce new line D
+        self.add_rev('root', 'D', ['B'], 'DaBbc')
+        # Merges B and C by just incorporating both
+        self.add_rev('root', 'E', ['B', 'C'], 'aBbCc')
+        # Introduce new line F
+        self.add_rev('root', 'F', ['C'], 'abCcF')
+        # Merge D & E by just combining the texts
+        self.add_rev('root', 'G', ['D', 'E'], 'DaBbCc')
+        # Merge F & E by just combining the texts
+        self.add_rev('root', 'H', ['F', 'E'], 'aBbCcF')
+        # Merge G & H by just combining texts
+        self.add_rev('root', 'I', ['G', 'H'], 'DaBbCcF')
+        # Merge G & H but supersede an old line in B
+        self.add_rev('root', 'J', ['H', 'G'], 'DaJbCcF')
+        plan = self.plan_merge_vf.plan_merge('I', 'J')
+        self.assertEqual([
+                          ('unchanged', 'D\n'),
+                          ('unchanged', 'a\n'),
+                          ('killed-b', 'B\n'),
+                          ('new-b', 'J\n'),
+                          ('unchanged', 'b\n'),
+                          ('unchanged', 'C\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'F\n')],
+                         list(plan))
+
+    def test_plan_merge_tail_triple_ancestors(self):
+        # The graph looks like this:
+        #       A       # Common to all ancestors
+        #      / \
+        #     B   C     # Ancestors of E, only common to one side
+        #     |\ /|
+        #     D E F     # D, F are unique to G, H respectively
+        #     |/|\|     # E is the LCA for G & H, and the unique LCA for
+        #     G Q H     # I, J
+        #     |\ /|     # Q is just an extra node which is merged into both
+        #     | X |     # I and J
+        #     |/ \|
+        #     I   J     # criss-cross merge of G, H
+        #
+        # This is the same as the test_plan_merge_tail_ancestors, except we add
+        # a third LCA that doesn't add new lines, but will trigger our more
+        # involved ancestry logic
+
+        self.add_rev('root', 'A', [], 'abc')
+        self.add_rev('root', 'B', ['A'], 'aBbc')
+        self.add_rev('root', 'C', ['A'], 'abCc')
+        self.add_rev('root', 'D', ['B'], 'DaBbc')
+        self.add_rev('root', 'E', ['B', 'C'], 'aBbCc')
+        self.add_rev('root', 'F', ['C'], 'abCcF')
+        self.add_rev('root', 'G', ['D', 'E'], 'DaBbCc')
+        self.add_rev('root', 'H', ['F', 'E'], 'aBbCcF')
+        self.add_rev('root', 'Q', ['E'], 'aBbCc')
+        self.add_rev('root', 'I', ['G', 'Q', 'H'], 'DaBbCcF')
+        # Merge G & H but supersede an old line in B
+        self.add_rev('root', 'J', ['H', 'Q', 'G'], 'DaJbCcF')
+        plan = self.plan_merge_vf.plan_merge('I', 'J')
+        # This has a slight incorrect value, as it considers the lines in A to
+        # be 'killed-base', because they show up as new in both B and C, and
+        # then E has to resolve which ones are the 'real' ones.
+        # However, I believe this is okay as the important lines will all
+        # derive from E.
+        self.assertEqual([
+                          ('unchanged', 'D\n'),
+                          ('unchanged', 'a\n'),
+                          ('killed-b', 'B\n'),
+                          ('new-b', 'J\n'),
+                          ('unchanged', 'b\n'),
+                          ('killed-base', 'c\n'), # Not technically correct
+                          ('killed-base', 'a\n'), # as they came from A
+                          ('killed-base', 'b\n'), # but the final text is right
+                          ('unchanged', 'C\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'F\n')],
+                         list(plan))
+
     def test_plan_merge_uncommitted_files(self):
         self.setup_plan_merge_uncommitted()
         plan = self.plan_merge_vf.plan_merge('B:', 'C:')