Rev 3528: Bring in the code to collapse linear portions of the graph. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

John Arbash Meinel john at arbash-meinel.com
Sun Jul 13 05:06:20 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/weave_merge

------------------------------------------------------------
revno: 3528
revision-id: john at arbash-meinel.com-20080713040616-fl43f8inkw19qj2l
parent: john at arbash-meinel.com-20080713003042-ak1b725ck5vmpbmi
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: weave_merge
timestamp: Sat 2008-07-12 23:06:16 -0500
message:
  Bring in the code to collapse linear portions of the graph.
  Also, start handling when we get NULL_REVISION instead of a tuple key
  from various apis.
  We could probably handle it at a different level if we really wanted.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/merge.py                merge.py-20050513021216-953b65a438527106
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
  bzrlib/tests/test_merge.py     testmerge.py-20050905070950-c1b5aa49ff911024
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2008-07-10 22:41:46 +0000
+++ b/bzrlib/graph.py	2008-07-13 04:06:16 +0000
@@ -1444,3 +1444,78 @@
         """
         return self._keys
 
+
+def collapse_linear_regions(parent_map):
+    """Collapse regions of the graph that are 'linear'.
+
+    For example::
+
+      A:[B], B:[C]
+
+    can be collapsed by removing B and getting::
+
+      A:[C]
+
+    :param parent_map: A dictionary mapping children to their parents
+    :return: Another dictionary with 'linear' chains collapsed
+    """
+    # Note: this isn't a strictly minimal collapse. For example:
+    #   A
+    #  / \
+    # B   C
+    #  \ /
+    #   D
+    #   |
+    #   E
+    # Will not have 'D' removed, even though 'E' could fit. Also:
+    #   A
+    #   |    A
+    #   B => |
+    #   |    C
+    #   C
+    # A and C are both kept because they are edges of the graph. We *could* get
+    # rid of A if we wanted.
+    #   A
+    #  / \
+    # B   C
+    # |   |
+    # D   E
+    #  \ /
+    #   F
+    # Will not have any nodes removed, even though you do have an
+    # 'uninteresting' linear D->B and E->C
+    children = {}
+    for child, parents in parent_map.iteritems():
+        children.setdefault(child, [])
+        for p in parents:
+            children.setdefault(p, []).append(child)
+
+    orig_children = dict(children)
+    removed = set()
+    result = dict(parent_map)
+    for node in parent_map:
+        parents = result[node]
+        if len(parents) == 1:
+            parent_children = children[parents[0]]
+            if len(parent_children) != 1:
+                # This is not the only child
+                continue
+            node_children = children[node]
+            if len(node_children) != 1:
+                continue
+            child_parents = result.get(node_children[0], None)
+            if child_parents is None:
+                import pdb; pdb.set_trace()
+            if len(child_parents) != 1:
+                # This is not its only parent
+                continue
+            assert child_parents[0] == node
+            # The child of this node only points at it, and the parent only has
+            # this as a child. remove this node, and join the others together
+            result[node_children[0]] = parents
+            children[parents[0]] = node_children
+            del result[node]
+            del children[node]
+            removed.add(node)
+
+    return result

=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py	2008-07-13 00:30:42 +0000
+++ b/bzrlib/merge.py	2008-07-13 04:06:16 +0000
@@ -23,6 +23,7 @@
 from bzrlib import (
     debug,
     errors,
+    graph as _mod_graph,
     osutils,
     patiencediff,
     registry,
@@ -1462,6 +1463,12 @@
                     unique_lca = None
                 else:
                     unique_lca = list(cur_lcas)[0]
+                    if unique_lca == NULL_REVISION:
+                        # find_lca will return a plain 'NULL_REVISION' rather
+                        # than a key tuple when there is no common ancestor, we
+                        # prefer to just use None, because it doesn't confuse
+                        # _get_interesting_texts()
+                        unique_lca = None
                 parent_map.update(self._find_unique_parents(next_lcas,
                                                             unique_lca))
                 break
@@ -1484,6 +1491,11 @@
         #       isn't a "backwards compatible" api change.
         if base_key is None:
             parent_map = dict(self.graph.iter_ancestry(tip_keys))
+            # We remove NULL_REVISION because it isn't a proper tuple key, and
+            # thus confuses things like _get_interesting_texts, and our logic
+            # to add the texts into the memory weave.
+            if NULL_REVISION in parent_map:
+                parent_map.pop(NULL_REVISION)
         else:
             interesting = set()
             for tip in tip_keys:
@@ -1494,8 +1506,11 @@
         culled_parent_map, child_map, tails = self._remove_external_references(
             parent_map)
         # Remove all the tails but base_key
-        tails.remove(base_key)
-        self._prune_tails(culled_parent_map, child_map, tails)
+        if base_key is not None:
+            tails.remove(base_key)
+            self._prune_tails(culled_parent_map, child_map, tails)
+        # Now remove all the uninteresting 'linear' regions
+        # simple_map = _mod_graph.collapse_linear_regions(culled_parent_map)
         return culled_parent_map
 
     @staticmethod
@@ -1566,6 +1581,8 @@
         all_revision_keys.add(self.a_key)
         all_revision_keys.add(self.b_key)
 
+        if NULL_REVISION in all_revision_keys:
+            import pdb; pdb.set_trace()
         # Everything else is in 'keys' but get_lines is in 'revision_ids'
         all_texts = self.get_lines([k[-1] for k in all_revision_keys])
         return all_texts

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2008-07-10 22:41:46 +0000
+++ b/bzrlib/tests/test_graph.py	2008-07-13 04:06:16 +0000
@@ -1384,3 +1384,41 @@
         # Use sorted because we don't care about the order, just that each is
         # only present 1 time.
         self.assertEqual(['a', 'b'], sorted(self.inst_pp.calls))
+
+
+class TestCollapseLinearRegions(tests.TestCase):
+
+    def assertCollapsed(self, collapsed, original):
+        self.assertEqual(collapsed,
+                         _mod_graph.collapse_linear_regions(original))
+
+    def test_collapse_nothing(self):
+        d = {1:[2, 3], 2:[], 3:[]}
+        self.assertCollapsed(d, d)
+        d = {1:[2], 2:[3, 4], 3:[5], 4:[5], 5:[]}
+        self.assertCollapsed(d, d)
+
+    def test_collapse_chain(self):
+        # Any time we have a linear chain, we should be able to collapse
+        d = {1:[2], 2:[3], 3:[4], 4:[5], 5:[]}
+        self.assertCollapsed({1:[5], 5:[]}, d)
+        d = {5:[4], 4:[3], 3:[2], 2:[1], 1:[]}
+        self.assertCollapsed({5:[1], 1:[]}, d)
+        d = {5:[3], 3:[4], 4:[1], 1:[2], 2:[]}
+        self.assertCollapsed({5:[2], 2:[]}, d)
+
+    def test_collapse_with_multiple_children(self):
+        #    7
+        #    |
+        #    6
+        #   / \
+        #  4   5
+        #  |   |
+        #  3   2
+        #   \ /
+        #    1
+        #
+        # 4 and 5 cannot be removed because 6 has 2 children
+        # 3 and 2 cannot be removed because 1 has 2 parents
+        d = {1:[2, 3], 2:[4], 4:[6], 3:[5], 5:[6], 6:[7], 7:[]}
+        self.assertCollapsed(d, d)

=== modified file 'bzrlib/tests/test_merge.py'
--- a/bzrlib/tests/test_merge.py	2008-07-13 00:30:42 +0000
+++ b/bzrlib/tests/test_merge.py	2008-07-13 04:06:16 +0000
@@ -643,6 +643,46 @@
                           ('unchanged', 'F\n')],
                          list(plan))
 
+    def test_plan_merge_2_tail_triple_ancestors(self):
+        # The graph looks like this:
+        #     A   B     # 2 tails going back to NULL
+        #     |\ /|
+        #     D E F     # D, is unique to G, F to H
+        #     |/|\|     # E is the LCA for G & H, and the unique LCA for
+        #     G Q H     # I, J
+        #     |\ /|     # Q is just an extra node which is merged into both
+        #     | X |     # I and J
+        #     |/ \|
+        #     I   J     # criss-cross merge of G, H (and Q)
+        #
+
+        # This is meant to test after hitting a 3-way LCA, and multiple tail
+        # ancestors (only have NULL_REVISION in common)
+
+        self.add_rev('root', 'A', [], 'abc')
+        self.add_rev('root', 'B', [], 'def')
+        self.add_rev('root', 'D', ['A'], 'Dabc')
+        self.add_rev('root', 'E', ['A', 'B'], 'abcdef')
+        self.add_rev('root', 'F', ['B'], 'defF')
+        self.add_rev('root', 'G', ['D', 'E'], 'Dabcdef')
+        self.add_rev('root', 'H', ['F', 'E'], 'abcdefF')
+        self.add_rev('root', 'Q', ['E'], 'abcdef')
+        self.add_rev('root', 'I', ['G', 'Q', 'H'], 'DabcdefF')
+        # Merge G & H but supersede an old line in B
+        self.add_rev('root', 'J', ['H', 'Q', 'G'], 'DabcdJfF')
+        plan = self.plan_merge_vf.plan_merge('I', 'J')
+        self.assertEqual([
+                          ('unchanged', 'D\n'),
+                          ('unchanged', 'a\n'),
+                          ('unchanged', 'b\n'),
+                          ('unchanged', 'c\n'),
+                          ('unchanged', 'd\n'),
+                          ('killed-b', 'e\n'),
+                          ('new-b', 'J\n'),
+                          ('unchanged', 'f\n'),
+                          ('unchanged', 'F\n')],
+                         list(plan))
+
     def test_plan_merge_uncommitted_files(self):
         self.setup_plan_merge_uncommitted()
         plan = self.plan_merge_vf.plan_merge('B:', 'C:')



More information about the bazaar-commits mailing list