Rev 3107: Merge in the final updates for Graph.heads() in http://bzr.arbash-meinel.com/branches/bzr/1.1-dev/graph_optimization

John Arbash Meinel john at arbash-meinel.com
Thu Dec 20 22:33:33 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/1.1-dev/graph_optimization

------------------------------------------------------------
revno: 3107
revision-id:john at arbash-meinel.com-20071220223307-qa45b18h62noat8c
parent: john at arbash-meinel.com-20071218224058-fsihu8vgtmksb7vp
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_optimization
timestamp: Thu 2007-12-20 16:33:07 -0600
message:
  Merge in the final updates for Graph.heads()
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2007-12-18 19:42:10 +0000
+++ b/bzrlib/graph.py	2007-12-20 22:33:07 +0000
@@ -321,9 +321,14 @@
         searchers = dict((c, self._make_breadth_first_searcher([c]))
                           for c in candidate_heads)
         active_searchers = dict(searchers)
-        # skip over the actual candidate for each searcher
+        # The first step doesn't require accessing any parents
+        parent_map = {}
+
+        # skip over the actual candidate for each searcher, and figure out what
+        # the next nodes are going to be.
+        next_to_search = set()
         for searcher in active_searchers.itervalues():
-            searcher.next()
+            next_to_search.update(searcher.step(parent_map))
         # The common walker finds nodes that are common to two or more of the
         # input keys, so that we don't access all history when a currently
         # uncommon search point actually meets up with something behind a
@@ -332,13 +337,10 @@
         # accessing all history.
         common_walker = self._make_breadth_first_searcher([])
         while len(active_searchers) > 0:
+            parent_map = self.get_parent_map(next_to_search)
             ancestors = set()
             # advance searches
-            try:
-                common_walker.next()
-            except StopIteration:
-                # No common points being searched at this time.
-                pass
+            common_walker.step(parent_map)
             for candidate in active_searchers.keys():
                 try:
                     searcher = active_searchers[candidate]
@@ -347,26 +349,30 @@
                     # through this for loop, because it was determined to be
                     # a descendant of another candidate.
                     continue
-                try:
-                    ancestors.update(searcher.next())
-                except StopIteration:
+                next_ancestors = searcher.step(parent_map)
+                if next_ancestors:
+                    ancestors.update(next_ancestors)
+                else:
                     del active_searchers[candidate]
-                    continue
             # process found nodes
             new_common = set()
+            new_common_tips = set()
             for ancestor in ancestors:
                 if ancestor in candidate_heads:
                     candidate_heads.remove(ancestor)
+                    if len(candidate_heads) <= 1:
+                        # Shortcut, there must be a head, if there is only one
+                        # left, then this must be it
+                        return candidate_heads
                     del searchers[ancestor]
                     if ancestor in active_searchers:
                         del active_searchers[ancestor]
                 # it may meet up with a known common node
                 if ancestor in common_walker.seen:
                     # some searcher has encountered our known common nodes:
-                    # just stop it
-                    ancestor_set = set([ancestor])
-                    for searcher in searchers.itervalues():
-                        searcher.stop_searching_any(ancestor_set)
+                    # just stop it. we don't need to check the ancestry,
+                    # because this is a tip running into common.
+                    new_common_tips.add(ancestor)
                 else:
                     # or it may have been just reached by all the searchers:
                     for searcher in searchers.itervalues():
@@ -377,11 +383,28 @@
                         # making it be known as a descendant of all candidates,
                         # so we can stop searching it, and any seen ancestors
                         new_common.add(ancestor)
-                        for searcher in searchers.itervalues():
-                            seen_ancestors =\
-                                searcher.find_seen_ancestors(ancestor)
-                            searcher.stop_searching_any(seen_ancestors)
-            common_walker.start_searching(new_common)
+            if new_common_tips:
+                for searcher in searchers.itervalues():
+                    searcher.stop_searching_any(new_common_tips)
+            if new_common:
+                known_common = common_walker.seen - new_common
+                for searcher in searchers.itervalues():
+                    seen_ancestors = searcher.find_seen_ancestors(new_common,
+                                                                  known_common)
+                    stopped = searcher.stop_searching_any(seen_ancestors)
+                    # Start walking the stopped nodes in the common searcher
+                    # Also, add all seen ancesters to the common seen list, so
+                    # the common walker doesn't have to search them again if
+                    # they show up by a different path (we are already
+                    # searching their ancestors).
+                    common_walker.start_searching(stopped)
+                    common_walker.seen.update(seen_ancestors)
+            # After we stop searching nodes, figure out what the next nodes are
+            # going to be, so we can preload them
+            next_to_search = set()
+            for searcher in searchers.itervalues():
+                next_to_search.update(searcher.will_search())
+            next_to_search.update(common_walker.will_search())
         return candidate_heads
 
     def find_unique_lca(self, left_revision, right_revision,
@@ -511,40 +534,72 @@
         return ('_BreadthFirstSearcher(%s,'
                 ' seen=%r)' % (search, list(self.seen)))
 
+    def will_search(self):
+        """Give the list of nodes that will be searched next."""
+        # will_search does not give valid responses for the first request,
+        # because it doesn't need parents, it is going to directly return these
+        # nodes
+        assert self._search_revisions is not None
+        return self._search_revisions
+
+    def step(self, parent_map):
+        """Step to the next set of ancestors.
+
+        :param parent_map: A dictionary mapping revision_id => parent_ids
+            It is assumed that all parents will be available. Callers should
+            use "will_search()" to find what revisions this searcher will need,
+            and then load them outside of this function.
+        :return: A set of ancestors (in no particular order)
+        """
+        if self._search_revisions is None:
+            self._search_revisions = self._start
+        else:
+            new_search_revisions = set()
+            for revision_id in self._search_revisions:
+                parent_ids = parent_map.get(revision_id, None)
+                if parent_ids is None:
+                    continue
+                new_search_revisions.update(parent_ids)
+            new_search_revisions.difference_update(self.seen)
+            self._search_revisions = new_search_revisions
+        self.seen.update(self._search_revisions)
+        return self._search_revisions
+
     def next(self):
         """Return the next ancestors of this revision.
 
         Ancestors are returned in the order they are seen in a breadth-first
         traversal.  No ancestor will be returned more than once.
         """
-        if self._search_revisions is None:
-            self._search_revisions = self._start
-        else:
-            new_search_revisions = set()
+        if self._search_revisions is not None:
             parent_map = self._parents_provider.get_parent_map(
                             self._search_revisions)
-            for parents in parent_map.itervalues():
-                new_search_revisions.update(p for p in parents if
-                                            p not in self.seen)
-            self._search_revisions = new_search_revisions
-        if len(self._search_revisions) == 0:
+        else:
+            parent_map = {}
+        next_revisions = self.step(parent_map)
+        if not next_revisions:
             raise StopIteration()
-        self.seen.update(self._search_revisions)
-        return self._search_revisions
+        return next_revisions
 
     def __iter__(self):
         return self
 
-    def find_seen_ancestors(self, revision):
+    def find_seen_ancestors(self, revisions, ignore_common=None):
         """Find ancestors of this revision that have already been seen."""
-        searcher = _BreadthFirstSearcher([revision], self._parents_provider)
+        if ignore_common is None:
+            ignore_common = frozenset()
+        searcher = _BreadthFirstSearcher(revisions, self._parents_provider)
         seen_ancestors = set()
         for ancestors in searcher:
+            stop_nodes = set()
             for ancestor in ancestors:
                 if ancestor not in self.seen:
-                    searcher.stop_searching_any([ancestor])
+                    stop_nodes.add(ancestor)
                 else:
                     seen_ancestors.add(ancestor)
+                    if ancestor in ignore_common:
+                        stop_nodes.add(ancestor)
+            searcher.stop_searching_any(stop_nodes)
         return seen_ancestors
 
     def stop_searching_any(self, revisions):
@@ -555,7 +610,7 @@
         search list.  In this case, the call is a no-op.
         """
         stopped = self._search_revisions.intersection(revisions)
-        self._search_revisions = self._search_revisions.difference(revisions)
+        self._search_revisions.difference_update(stopped)
         return stopped
 
     def start_searching(self, revisions):



More information about the bazaar-commits mailing list