Rev 3107: Merge in the final updates for Graph.heads() in http://bzr.arbash-meinel.com/branches/bzr/1.1-dev/graph_optimization
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 20 22:33:33 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/1.1-dev/graph_optimization
------------------------------------------------------------
revno: 3107
revision-id:john at arbash-meinel.com-20071220223307-qa45b18h62noat8c
parent: john at arbash-meinel.com-20071218224058-fsihu8vgtmksb7vp
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_optimization
timestamp: Thu 2007-12-20 16:33:07 -0600
message:
Merge in the final updates for Graph.heads()
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-12-18 19:42:10 +0000
+++ b/bzrlib/graph.py 2007-12-20 22:33:07 +0000
@@ -321,9 +321,14 @@
searchers = dict((c, self._make_breadth_first_searcher([c]))
for c in candidate_heads)
active_searchers = dict(searchers)
- # skip over the actual candidate for each searcher
+ # The first step doesn't require accessing any parents
+ parent_map = {}
+
+ # skip over the actual candidate for each searcher, and figure out what
+ # the next nodes are going to be.
+ next_to_search = set()
for searcher in active_searchers.itervalues():
- searcher.next()
+ next_to_search.update(searcher.step(parent_map))
# The common walker finds nodes that are common to two or more of the
# input keys, so that we don't access all history when a currently
# uncommon search point actually meets up with something behind a
@@ -332,13 +337,10 @@
# accessing all history.
common_walker = self._make_breadth_first_searcher([])
while len(active_searchers) > 0:
+ parent_map = self.get_parent_map(next_to_search)
ancestors = set()
# advance searches
- try:
- common_walker.next()
- except StopIteration:
- # No common points being searched at this time.
- pass
+ common_walker.step(parent_map)
for candidate in active_searchers.keys():
try:
searcher = active_searchers[candidate]
@@ -347,26 +349,30 @@
# through this for loop, because it was determined to be
# a descendant of another candidate.
continue
- try:
- ancestors.update(searcher.next())
- except StopIteration:
+ next_ancestors = searcher.step(parent_map)
+ if next_ancestors:
+ ancestors.update(next_ancestors)
+ else:
del active_searchers[candidate]
- continue
# process found nodes
new_common = set()
+ new_common_tips = set()
for ancestor in ancestors:
if ancestor in candidate_heads:
candidate_heads.remove(ancestor)
+ if len(candidate_heads) <= 1:
+ # Shortcut, there must be a head, if there is only one
+ # left, then this must be it
+ return candidate_heads
del searchers[ancestor]
if ancestor in active_searchers:
del active_searchers[ancestor]
# it may meet up with a known common node
if ancestor in common_walker.seen:
# some searcher has encountered our known common nodes:
- # just stop it
- ancestor_set = set([ancestor])
- for searcher in searchers.itervalues():
- searcher.stop_searching_any(ancestor_set)
+ # just stop it. we don't need to check the ancestry,
+ # because this is a tip running into common.
+ new_common_tips.add(ancestor)
else:
# or it may have been just reached by all the searchers:
for searcher in searchers.itervalues():
@@ -377,11 +383,28 @@
# making it be known as a descendant of all candidates,
# so we can stop searching it, and any seen ancestors
new_common.add(ancestor)
- for searcher in searchers.itervalues():
- seen_ancestors =\
- searcher.find_seen_ancestors(ancestor)
- searcher.stop_searching_any(seen_ancestors)
- common_walker.start_searching(new_common)
+ if new_common_tips:
+ for searcher in searchers.itervalues():
+ searcher.stop_searching_any(new_common_tips)
+ if new_common:
+ known_common = common_walker.seen - new_common
+ for searcher in searchers.itervalues():
+ seen_ancestors = searcher.find_seen_ancestors(new_common,
+ known_common)
+ stopped = searcher.stop_searching_any(seen_ancestors)
+ # Start walking the stopped nodes in the common searcher
+ # Also, add all seen ancesters to the common seen list, so
+ # the common walker doesn't have to search them again if
+ # they show up by a different path (we are already
+ # searching their ancestors).
+ common_walker.start_searching(stopped)
+ common_walker.seen.update(seen_ancestors)
+ # After we stop searching nodes, figure out what the next nodes are
+ # going to be, so we can preload them
+ next_to_search = set()
+ for searcher in searchers.itervalues():
+ next_to_search.update(searcher.will_search())
+ next_to_search.update(common_walker.will_search())
return candidate_heads
def find_unique_lca(self, left_revision, right_revision,
@@ -511,40 +534,72 @@
return ('_BreadthFirstSearcher(%s,'
' seen=%r)' % (search, list(self.seen)))
+ def will_search(self):
+ """Give the list of nodes that will be searched next."""
+ # will_search does not give valid responses for the first request,
+ # because it doesn't need parents, it is going to directly return these
+ # nodes
+ assert self._search_revisions is not None
+ return self._search_revisions
+
+ def step(self, parent_map):
+ """Step to the next set of ancestors.
+
+ :param parent_map: A dictionary mapping revision_id => parent_ids
+ It is assumed that all parents will be available. Callers should
+ use "will_search()" to find what revisions this searcher will need,
+ and then load them outside of this function.
+ :return: A set of ancestors (in no particular order)
+ """
+ if self._search_revisions is None:
+ self._search_revisions = self._start
+ else:
+ new_search_revisions = set()
+ for revision_id in self._search_revisions:
+ parent_ids = parent_map.get(revision_id, None)
+ if parent_ids is None:
+ continue
+ new_search_revisions.update(parent_ids)
+ new_search_revisions.difference_update(self.seen)
+ self._search_revisions = new_search_revisions
+ self.seen.update(self._search_revisions)
+ return self._search_revisions
+
def next(self):
"""Return the next ancestors of this revision.
Ancestors are returned in the order they are seen in a breadth-first
traversal. No ancestor will be returned more than once.
"""
- if self._search_revisions is None:
- self._search_revisions = self._start
- else:
- new_search_revisions = set()
+ if self._search_revisions is not None:
parent_map = self._parents_provider.get_parent_map(
self._search_revisions)
- for parents in parent_map.itervalues():
- new_search_revisions.update(p for p in parents if
- p not in self.seen)
- self._search_revisions = new_search_revisions
- if len(self._search_revisions) == 0:
+ else:
+ parent_map = {}
+ next_revisions = self.step(parent_map)
+ if not next_revisions:
raise StopIteration()
- self.seen.update(self._search_revisions)
- return self._search_revisions
+ return next_revisions
def __iter__(self):
return self
- def find_seen_ancestors(self, revision):
+ def find_seen_ancestors(self, revisions, ignore_common=None):
"""Find ancestors of this revision that have already been seen."""
- searcher = _BreadthFirstSearcher([revision], self._parents_provider)
+ if ignore_common is None:
+ ignore_common = frozenset()
+ searcher = _BreadthFirstSearcher(revisions, self._parents_provider)
seen_ancestors = set()
for ancestors in searcher:
+ stop_nodes = set()
for ancestor in ancestors:
if ancestor not in self.seen:
- searcher.stop_searching_any([ancestor])
+ stop_nodes.add(ancestor)
else:
seen_ancestors.add(ancestor)
+ if ancestor in ignore_common:
+ stop_nodes.add(ancestor)
+ searcher.stop_searching_any(stop_nodes)
return seen_ancestors
def stop_searching_any(self, revisions):
@@ -555,7 +610,7 @@
search list. In this case, the call is a no-op.
"""
stopped = self._search_revisions.intersection(revisions)
- self._search_revisions = self._search_revisions.difference(revisions)
+ self._search_revisions.difference_update(stopped)
return stopped
def start_searching(self, revisions):
More information about the bazaar-commits
mailing list