Rev 3097: Possible fix for find_differences() and shortcuts. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

Sat Dec 8 16:30:02 GMT 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

------------------------------------------------------------
revno: 3097
revision-id:john at arbash-meinel.com-20071208162928-as2g1oon0bg1wxvv
parent: john at arbash-meinel.com-20071208020754-8jo7xvj2e2l4dzrc
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Sat 2007-12-08 10:29:28 -0600
message:
  Possible fix for find_differences() and shortcuts.
  Basically, we track that a searcher stopped because it walked off the
  graph, rather than because it all of the nodes it was searching we
  found to be common nodes. And we keep processing the common searcher
  until it either cannot find the unique node (no more nodes to search),
  or it finds that node in common (no more stalled searchers).
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2007-12-08 02:07:54 +0000
+++ b/bzrlib/graph.py	2007-12-08 16:29:28 +0000
@@ -278,8 +278,18 @@
 
         next_to_search = set(revisions)
 
+        stalled_searchers = []
+
+        # Search as long as:
+        #  a) A searcher still thinks there are more nodes to process, since
+        #     they might find new common nodes.
+        #  b) A searcher has reached the 'null:' revision, but the
+        #     common_searcher has not completed yet. Because the common
+        #     searcher may find that the last node visted by the searcher is
+        #     actually a shortcut from a different common ancestor.
         while True:
-            if len(active_searchers) == 0:
+            if (len(active_searchers) == 0
+                and (not stalled_searchers or not common_searcher.will_search())):
                 return border_ancestors, common_ancestors, [s.seen for s in
                                                             searchers]
             parents = self.get_parent_map(next_to_search)
@@ -300,6 +310,8 @@
                 if new_ancestors:
                     newly_seen.update(new_ancestors)
                     new_active_searchers.append(searcher)
+                elif searcher.hit_null:
+                    stalled_searchers.append(searcher)
             active_searchers = new_active_searchers
             new_common = set()
             for revision in newly_seen:
@@ -318,6 +330,11 @@
                     new_common.add(revision)
             for searcher in searchers:
                 update_common(searcher, new_common)
+            new_stalled_searchers = []
+            for searcher in stalled_searchers:
+                if searcher.hit_null:
+                    new_stalled_searchers.append(searcher)
+            stalled_searchers = new_stalled_searchers
             next_to_search = set(common_searcher.will_search())
             for searcher in active_searchers:
                 next_to_search.update(searcher.will_search())
@@ -533,6 +550,7 @@
         self._search_revisions = None
         self.seen = set(revisions)
         self._parents_provider = parents_provider
+        self.hit_null = set() # Revisions which reached NULL_REVISION
 
     def __repr__(self):
         return ('_BreadthFirstSearcher(self._search_revisions=%r,'
@@ -562,6 +580,11 @@
                 parent_ids = parents.get(revision_id, None)
                 if parent_ids is None:
                     continue
+                # XXX: jam 20071208 Unfortunately, we can't just say
+                #      "parent_ids == (NULL_REVISION,)", because some
+                #      parent_providers return tuples and some return lists.
+                if parent_ids and parent_ids[0] == revision.NULL_REVISION:
+                    self.hit_null.add(revision_id)
                 new_search_revisions.update(parent_ids)
             new_search_revisions.difference_update(self.seen)
             self._search_revisions = new_search_revisions
@@ -613,6 +636,7 @@
         """
         stopped = self._search_revisions.intersection(revisions)
         self._search_revisions.difference_update(stopped)
+        self.hit_null.difference_update(revisions)
         return stopped
 
     def start_searching(self, revisions):

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2007-12-08 02:07:54 +0000
+++ b/bzrlib/tests/test_graph.py	2007-12-08 16:29:28 +0000
@@ -161,6 +161,29 @@
                    'd':['c'], 'e':['c'], 'f':['a', 'd'],
                    'g':['a', 'e']}
 
+# Shortcut with extra root
+# We have a long history shortcut, and an extra root, which is why we can't
+# stop searchers based on seeing NULL_REVISION
+#  NULL_REVISION
+#       |   |
+#       a   |
+#       |\  |
+#       b | |
+#       | | |
+#       c | |
+#       | | |
+#       d | g
+#       |\|/
+#       e f
+shortcut_extra_root = {'a': [NULL_REVISION],
+                       'b': ['a'],
+                       'c': ['b'],
+                       'd': ['c'],
+                       'e': ['d'],
+                       'f': ['a', 'd', 'g'],
+                       'g': [NULL_REVISION],
+                      }
+
 #  NULL_REVISION
 #       |
 #       f
@@ -338,6 +361,14 @@
         self.assertEqual((set(['rev4', 'rev3', 'rev2a']), set()),
                          graph.find_difference('rev4', 'rev2b'))
 
+    def test_graph_difference_separate_ancestry(self):
+        graph = self.make_graph(ancestry_2)
+        self.assertEqual((set(['rev1a']), set(['rev1b'])),
+                         graph.find_difference('rev1a', 'rev1b'))
+        self.assertEqual((set(['rev1a', 'rev2a', 'rev3a', 'rev4a']),
+                          set(['rev1b'])),
+                         graph.find_difference('rev4a', 'rev1b'))
+
     def test_graph_difference_criss_cross(self):
         graph = self.make_graph(criss_cross)
         self.assertEqual((set(['rev3a']), set(['rev3b'])),
@@ -347,14 +378,21 @@
 
     def test_graph_difference_extended_history(self):
         graph = self.make_graph(extended_history_shortcut)
-        self.expectFailure('find_difference is confused by shortcuts',
-            self.assertEqual, (set(['e']), set(['f'])),
-            graph.find_difference('e', 'f'))
         self.assertEqual((set(['e']), set(['f'])),
                          graph.find_difference('e', 'f'))
         self.assertEqual((set(['f']), set(['e'])),
                          graph.find_difference('f', 'e'))
 
+    def test_graph_difference_double_shortcut(self):
+        graph = self.make_graph(double_shortcut)
+        self.assertEqual((set(['d', 'f']), set(['e', 'g'])),
+                         graph.find_difference('f', 'g'))
+
+    def test_graph_difference_shortcut_extra_root(self):
+        graph = self.make_graph(shortcut_extra_root)
+        self.assertEqual((set(['e']), set(['f', 'g'])),
+                         graph.find_difference('e', 'f'))
+
     def test_stacked_parents_provider(self):
 
         parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})