Rev 3101: Implement find_differences using a different algorithm. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

Wed Dec 12 18:42:11 GMT 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

------------------------------------------------------------
revno: 3101
revision-id:john at arbash-meinel.com-20071212184135-kym0o1vf0jnd66s3
parent: john at arbash-meinel.com-20071212152039-asn1cpp5l2nin0sf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Wed 2007-12-12 12:41:35 -0600
message:
  Implement find_differences using a different algorithm.
  Now instead of having a common searcher, instead we leave all searchers active.
  When we find a common node, we have all searchers update by all
  seen ancestors of that node. This effectively jumps them to the
  current tip of all search ancestries.
  Once all searchers are operating in sync, we terminate.
  This seems to avoid the pitfalls of history shortcuts.
  This might be slightly less efficient, in that it will cause a few
  more repeated requests for seen nodes. And it might hit more calls
  into find_seen_ancestors, which causes a get_parents() spin loop,
  but it makes sure all searchers have been properly updated with seen
  nodes.
  So far it passes all tests.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2007-12-09 02:16:47 +0000
+++ b/bzrlib/graph.py	2007-12-12 18:41:35 +0000
@@ -17,6 +17,7 @@
 from bzrlib import (
     errors,
     revision,
+    trace,
     tsort,
     )
 from bzrlib.deprecated_graph import (node_distances, select_farthest)
@@ -264,50 +265,22 @@
         """
         if None in revisions:
             raise errors.InvalidRevisionId(None, self)
-        common_searcher = self._make_breadth_first_searcher([])
         common_ancestors = set()
         searchers = [self._make_breadth_first_searcher([r])
                      for r in revisions]
         active_searchers = searchers[:]
         border_ancestors = set()
-        def update_common(searcher, revisions):
-            w_seen_ancestors = searcher.find_seen_ancestors(revisions)
-            stopped = searcher.stop_searching_any(w_seen_ancestors)
-            common_ancestors.update(w_seen_ancestors)
-            common_searcher.start_searching(stopped)
-
-        next_to_search = set(revisions)
-
-        # Did any searcher reach the end of the graph?
-        # If so, we need to keep walking the common searcher, in case the last
-        # revisions are actually common.
-        walked_off_graph = False
+
+        # The very first step shouldn't need to lookup any ancestry
+        next_to_search = set()
 
         while True:
-            if (len(active_searchers) == 0
-                and (not walked_off_graph or not common_searcher.will_search())):
-                return border_ancestors, common_ancestors, [s.seen for s in
-                                                            searchers]
             parents = self.get_parent_map(next_to_search)
-            new_common = common_searcher.step(parents)
-            if new_common:
-                common_ancestors.update(new_common)
-                for searcher in active_searchers:
-                    update_common(searcher, new_common)
-                # All the revisions found by the common_searcher are not
-                # borders, because they are descended from another common
-                # ancestor
-                border_ancestors.difference_update(new_common)
-
             newly_seen = set()
-            for searcher in active_searchers:
+            for searcher in searchers:
                 new_ancestors = searcher.step(parents)
                 if new_ancestors:
                     newly_seen.update(new_ancestors)
-                else:
-                    # This searcher reached the end of the graph, rather than
-                    # being stopped by finding common revisions
-                    walked_off_graph = True
             new_common = set()
             for revision in newly_seen:
                 if revision in common_ancestors:
@@ -323,16 +296,37 @@
                     # after walking for a while.
                     border_ancestors.add(revision)
                     new_common.add(revision)
+            if new_common:
+                for searcher in searchers:
+                    new_common.update(searcher.find_seen_ancestors(new_common))
+                for searcher in searchers:
+                    searcher.start_searching(new_common)
+                common_ancestors.update(new_common)
+
+            # Figure out what the searchers will be searching next, and if
+            # there is only 1 set being searched, then we are done searching,
+            # since all searchers would have to be searching the same data,
+            # thus it *must* be in common.
+            unique_search_sets = set()
+            next_to_search = set()
             for searcher in searchers:
-                update_common(searcher, new_common)
-            next_to_search = set(common_searcher.will_search())
-            new_active_searchers = []
-            for searcher in active_searchers:
-                this_next = searcher.will_search()
-                if this_next:
-                    next_to_search.update(this_next)
-                    new_active_searchers.append(searcher)
-            active_searchers = new_active_searchers
+                will_search_set = frozenset(searcher.will_search())
+                if will_search_set not in unique_search_sets:
+                    # This searcher is searching a unique set of nodes, let it
+                    next_to_search.update(will_search_set)
+                    unique_search_sets.add(will_search_set)
+
+            if len(unique_search_sets) == 1:
+                nodes = unique_search_sets.pop()
+                uncommon_nodes = nodes.difference(common_ancestors)
+                assert not uncommon_nodes, ("Somehow we ended up converging"
+                                            " without actually marking them as"
+                                            " in common."
+                                            "\nStart_nodes: %s"
+                                            "\nuncommon_nodes: %s"
+                                            % (revisions, uncommon_nodes))
+                return border_ancestors, common_ancestors, [s.seen for s in
+                                                            searchers]
 
     def heads(self, keys):
         """Return the heads from amongst keys.
@@ -550,8 +544,12 @@
         self._parents_provider = parents_provider
 
     def __repr__(self):
-        return ('_BreadthFirstSearcher(self._search_revisions=%r,'
-                ' self.seen=%r)' % (self._search_revisions, self.seen))
+        if self._search_revisions is not None:
+            search = 'searching=%r' % (list(self._search_revisions),)
+        else:
+            search = 'starting=%r' % (list(self._start),)
+        return ('_BreadthFirstSearcher(%s,'
+                ' seen=%r)' % (search, list(self.seen)))
 
     def will_search(self):
         """Give the list of nodes that will be searched next."""