Rev 3425: Lots of refactoring for find_unique_ancestors. in http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_unique_ancestors
John Arbash Meinel
john at arbash-meinel.com
Wed May 21 00:10:31 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_unique_ancestors
------------------------------------------------------------
revno: 3425
revision-id: john at arbash-meinel.com-20080520231020-wh8g3pq0whu2p78i
parent: john at arbash-meinel.com-20080513221726-vqpvwthr1tncpb29
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: find_unique_ancestors
timestamp: Tue 2008-05-20 18:10:20 -0500
message:
Lots of refactoring for find_unique_ancestors.
Split up the function into a bunch of helper functions.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2008-05-13 22:17:26 +0000
+++ b/bzrlib/graph.py 2008-05-20 23:10:20 +0000
@@ -240,19 +240,49 @@
# information you have so far.
# 5) Continue searching, stopping the common searches when the search
# tip is an ancestor of all unique nodes.
- # 6) Search is done when all common searchers have completed.
-
+ # 6) Aggregate together unique searchers when they are searching the
+ # same tips. When all unique searchers are searching the same node,
+ # stop move it to a single 'all_unique_searcher'.
+ # 7) The 'all_unique_searcher' represents the very 'tip' of searching.
+ # Most of the time this produces very little important information.
+ # So don't step it as quickly as the other searchers.
+ # 8) Search is done when all common searchers have completed.
+
+ unique_searcher, common_searcher = self._find_initial_unique_nodes(
+ [unique_revision], common_revisions)
+
+ unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
+ if not unique_nodes:
+ return unique_nodes
+
+ (all_unique_searcher,
+ unique_tip_searchers) = self._make_unique_searchers(unique_nodes,
+ unique_searcher, common_searcher)
+
+ self._refine_unique_nodes(unique_searcher, all_unique_searcher,
+ unique_tip_searchers, common_searcher)
+ true_unique_nodes = unique_nodes.difference(common_searcher.seen)
if 'graph' in debug.debug_flags:
- _mutter = trace.mutter
- else:
- def _mutter(*args, **kwargs):
- pass
-
- unique_searcher = self._make_breadth_first_searcher([unique_revision])
- # we know that unique_revision isn't in common_revisions
+ trace.mutter('Found %s truly unique nodes out of %s',
+ len(true_unique_nodes), len(unique_nodes))
+ return true_unique_nodes
+
+ def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
+ """Steps 1-3 of find_unique_ancestors.
+
+ Find the maximal set of unique nodes. Some of these might actually
+ still be common, but we are sure that there are no other unique nodes.
+
+ :return: (unique_searcher, common_searcher)
+ """
+
+ unique_searcher = self._make_breadth_first_searcher(unique_revisions)
+ # we know that unique_revisions aren't in common_revisions, so skip
+ # past them.
unique_searcher.next()
common_searcher = self._make_breadth_first_searcher(common_revisions)
+ # As long as we are still finding unique nodes, keep searching
while unique_searcher._next_query:
next_unique_nodes = set(unique_searcher.step())
next_common_nodes = set(common_searcher.step())
@@ -264,27 +294,36 @@
unique_are_common_nodes.update(
next_common_nodes.intersection(unique_searcher.seen))
if unique_are_common_nodes:
+ ancestors = unique_searcher.find_seen_ancestors(
+ unique_are_common_nodes)
# TODO: This is a bit overboard, we only really care about
# the ancestors of the tips because the rest we
# already know. This is *correct* but causes us to
- # search far too much ancestry.
- ancestors = unique_searcher.find_seen_ancestors(
- unique_are_common_nodes)
+ # search too much ancestry.
ancestors.update(common_searcher.find_seen_ancestors(ancestors))
unique_searcher.stop_searching_any(ancestors)
common_searcher.start_searching(ancestors)
- unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
- if not unique_nodes:
- return unique_nodes
+ return unique_searcher, common_searcher
+
+
+ def _make_unique_searchers(self, unique_nodes, unique_searcher,
+ common_searcher):
+ """Create a searcher for all the unique search tips (step 4).
+
+ As a side effect, the common_searcher will stop searching any nodes
+ that are ancestors of the unique searcher tips.
+
+ :return: (all_unique_searcher, unique_tip_searchers)
+ """
unique_tips = self._remove_simple_descendants(unique_nodes,
self.get_parent_map(unique_nodes))
if len(unique_tips) == 1:
- unique_searchers = []
+ unique_tip_searchers = []
ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
else:
- unique_searchers = []
+ unique_tip_searchers = []
for tip in unique_tips:
revs_to_search = unique_searcher.find_seen_ancestors([tip])
revs_to_search.update(
@@ -293,18 +332,21 @@
# We don't care about the starting nodes.
searcher._label = tip
searcher.step()
- unique_searchers.append(searcher)
+ unique_tip_searchers.append(searcher)
ancestor_all_unique = None
- for searcher in unique_searchers:
+ for searcher in unique_tip_searchers:
if ancestor_all_unique is None:
ancestor_all_unique = set(searcher.seen)
else:
ancestor_all_unique = ancestor_all_unique.intersection(
searcher.seen)
# Collapse all the common nodes into a single searcher
- all_unique_searcher = self._make_breadth_first_searcher(ancestor_all_unique)
+ all_unique_searcher = self._make_breadth_first_searcher(
+ ancestor_all_unique)
if ancestor_all_unique:
+ # We've seen these nodes in all the searchers, so we'll just go to
+ # the next
all_unique_searcher.step()
# Stop any search tips that are already known as ancestors of the
@@ -313,55 +355,137 @@
common_searcher.find_seen_ancestors(ancestor_all_unique))
total_stopped = 0
- for searcher in unique_searchers:
+ for searcher in unique_tip_searchers:
total_stopped += len(searcher.stop_searching_any(
searcher.find_seen_ancestors(ancestor_all_unique)))
- _mutter('For %s unique nodes, created %s + 1 unique searchers'
- ' (%s stopped search tips, %s common ancestors'
- ' (%s stopped common)',
- len(unique_nodes), len(unique_searchers), total_stopped,
- len(ancestor_all_unique), len(stopped_common))
- del ancestor_all_unique, stopped_common
-
- # We step the ancestor_all_unique searcher only every other step
- step_all_unique = 0
+ if 'graph' in debug.debug_flags:
+ trace.mutter('For %s unique nodes, created %s + 1 unique searchers'
+ ' (%s stopped search tips, %s common ancestors'
+ ' (%s stopped common)',
+ len(unique_nodes), len(unique_tip_searchers),
+ total_stopped, len(ancestor_all_unique),
+ len(stopped_common))
+ return all_unique_searcher, unique_tip_searchers
+
+ def _step_unique_and_common_searchers(self, common_searcher,
+ unique_tip_searchers,
+ unique_searcher):
+ newly_seen_common = set(common_searcher.step())
+ newly_seen_unique = set()
+ for searcher in unique_tip_searchers:
+ next = set(searcher.step())
+ next.update(unique_searcher.find_seen_ancestors(next))
+ next.update(common_searcher.find_seen_ancestors(next))
+ for alt_searcher in unique_tip_searchers:
+ if alt_searcher is searcher:
+ continue
+ next.update(alt_searcher.find_seen_ancestors(next))
+ searcher.start_searching(next)
+ newly_seen_unique.update(next)
+ return newly_seen_common, newly_seen_unique,
+
+ def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
+ all_unique_searcher,
+ newly_seen_unique, step_all_unique):
+ """Find nodes that are common to all unique_tip_searchers.
+
+ If it is time, step the all_unique_searcher, and add its nodes to the
+ result.
+ """
+ unique_are_common_nodes = newly_seen_unique.copy()
+ for searcher in unique_tip_searchers:
+ unique_are_common_nodes.intersection_update(searcher.seen)
+ unique_are_common_nodes.intersection_update(
+ all_unique_searcher.seen)
+ # Step all-unique less frequently than the other searchers.
+ # In the common case, we don't need to spider out far here, so
+ # avoid doing extra work.
+ if not step_all_unique:
+ tstart = time.clock()
+ nodes = all_unique_searcher.step()
+ unique_are_common_nodes.update(nodes)
+ tdelta = time.clock() - tstart
+ if 'graph' in debug.debug_flags:
+ trace.mutter('all_unique_searcher step() took %.3fs'
+ 'for %d nodes (%d total), iteration: %s',
+ tdelta, len(nodes), len(all_unique_searcher.seen),
+ all_unique_searcher._iterations)
+ return unique_are_common_nodes
+
+ def _collapse_unique_searchers(self, unique_tip_searchers,
+ unique_are_common_nodes):
+ """Combine searchers that are searching the same tips.
+
+ When two searchers are searching the same tips, we can stop one of the
+ searchers. We also know that the maximal set of common ancestors is the
+ intersection of the two original searchers.
+
+ :return: A list of searchers that are searching unique nodes.
+ """
+ # Filter out searchers that don't actually search different
+ # nodes. We already have the ancestry intersection for them
+ unique_search_tips = {}
+ for searcher in unique_tip_searchers:
+ stopped = searcher.stop_searching_any(unique_are_common_nodes)
+ will_search_set = frozenset(searcher._next_query)
+ if not will_search_set:
+ if 'graph' in debug.debug_flags:
+ trace.mutter('Unique searcher %s was stopped.'
+ ' (%s iterations) %d nodes stopped',
+ searcher._label,
+ searcher._iterations,
+ len(stopped))
+ elif will_search_set not in unique_search_tips:
+ # This searcher is searching a unique set of nodes, let it
+ unique_search_tips[will_search_set] = [searcher]
+ else:
+ unique_search_tips[will_search_set].append(searcher)
+ # TODO: it might be possible to collapse searchers faster when they
+ # only have *some* search tips in common.
+ next_unique_searchers = []
+ for searchers in unique_search_tips.itervalues():
+ if len(searchers) == 1:
+ # Searching unique tips, go for it
+ next_unique_searchers.append(searchers[0])
+ else:
+ # These searchers have started searching the same tips, we
+ # don't need them to cover the same ground. The
+ # intersection of their ancestry won't change, so create a
+ # new searcher, combining their histories.
+ next_searcher = searchers[0]
+ for searcher in searchers[1:]:
+ next_searcher.seen.intersection_update(searcher.seen)
+ if 'graph' in debug.debug_flags:
+ trace.mutter('Combining %s searchers into a single'
+ ' searcher searching %s nodes with'
+ ' %s ancestry',
+ len(searchers),
+ len(next_searcher._next_query),
+ len(next_searcher.seen))
+ next_unique_searchers.append(next_searcher)
+ return next_unique_searchers
+
+ def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
+ unique_tip_searchers, common_searcher):
+ """Steps 5-8 of find_unique_ancestors.
+
+ This function returns when common_searcher has stopped searching for
+ more nodes.
+ """
+ # We step the ancestor_all_unique searcher only every
+ # STEP_UNIQUE_SEARCHER_EVERY steps.
+ step_all_unique_counter = 0
# While we still have common nodes to search
while common_searcher._next_query:
- newly_seen_common = set(common_searcher.step())
- newly_seen_unique = set()
- def step_searchers():
- for searcher in unique_searchers:
- next = set(searcher.step())
- next.update(unique_searcher.find_seen_ancestors(next))
- next.update(common_searcher.find_seen_ancestors(next))
- for alt_searcher in unique_searchers:
- if alt_searcher is searcher:
- continue
- next.update(alt_searcher.find_seen_ancestors(next))
- searcher.start_searching(next)
- newly_seen_unique.update(next)
- step_searchers()
+ (newly_seen_common,
+ newly_seen_unique) = self._step_unique_and_common_searchers(
+ common_searcher, unique_tip_searchers, unique_searcher)
# These nodes are common ancestors of all unique nodes
- unique_are_common_nodes = newly_seen_unique.copy()
- def intersect_searchers():
- for searcher in unique_searchers:
- unique_are_common_nodes.intersection_update(searcher.seen)
- unique_are_common_nodes.intersection_update(
- all_unique_searcher.seen)
- # Step the all-unique less frequently than the other searchers.
- # In the common case, we don't need to spider out far here, so
- # avoid doing extra work.
- if not step_all_unique:
- tstart = time.clock()
- nodes = all_unique_searcher.step()
- unique_are_common_nodes.update(nodes)
- tdelta = time.clock() - tstart
- _mutter('all_unique_searcher step() took %.3fs for %d nodes'
- ' (%d total), iteration: %s', tdelta,
- len(nodes), len(all_unique_searcher.seen),
- all_unique_searcher._iterations)
- intersect_searchers()
- step_all_unique = (step_all_unique + 1) % STEP_UNIQUE_SEARCHER_EVERY
+ unique_are_common_nodes = self._find_nodes_common_to_all_unique(
+ unique_tip_searchers, all_unique_searcher, newly_seen_unique,
+ step_all_unique_counter==0)
+ step_all_unique_counter = ((step_all_unique_counter + 1)
+ % STEP_UNIQUE_SEARCHER_EVERY)
if newly_seen_common:
# If a 'common' node is an ancestor of all unique searchers, we
@@ -376,56 +500,16 @@
all_unique_searcher.start_searching(unique_are_common_nodes)
common_searcher.stop_searching_any(unique_are_common_nodes)
- def filter_searchers():
- # Filter out searchers that don't actually search different
- # nodes. We already have the ancestry intersection for them
- unique_search_tips = {}
- for searcher in unique_searchers:
- stopped = searcher.stop_searching_any(unique_are_common_nodes)
- will_search_set = frozenset(searcher._next_query)
- if not will_search_set:
- _mutter('Unique searcher %s was stopped.'
- ' (%s iterations) %d nodes stopped',
- searcher._label,
- searcher._iterations,
- len(stopped))
- elif will_search_set not in unique_search_tips:
- # This searcher is searching a unique set of nodes, let it
- unique_search_tips[will_search_set] = [searcher]
- else:
- unique_search_tips[will_search_set].append(searcher)
- next_unique_searchers = []
- for searchers in unique_search_tips.itervalues():
- if len(searchers) == 1:
- # Searching unique tips, go for it
- next_unique_searchers.append(searchers[0])
- else:
- # These searchers have started searching the same tips, we
- # don't need them to cover the same ground. The
- # intersection of their ancestry won't change, so create a
- # new searcher, combining their histories.
- next_searcher = searchers[0]
- ancestor_intersection = next_searcher.seen
- for searcher in searchers[1:]:
- ancestor_intersection = ancestor_intersection.intersection(searcher.seen)
- next_searcher.seen = ancestor_intersection
- _mutter('Combining %s searchers into a single searcher'
- ' searching %s nodes with %s ancestry',
- len(searchers), len(next_searcher._next_query),
- len(next_searcher.seen))
- next_unique_searchers.append(next_searcher)
- return next_unique_searchers
- next_unique_searchers = filter_searchers()
- if len(unique_searchers) != len(next_unique_searchers):
- _mutter('Collapsed %s unique searchers => %s'
- ' at %s iterations',
- len(unique_searchers), len(next_unique_searchers),
- all_unique_searcher._iterations)
- unique_searchers = next_unique_searchers
- true_unique_nodes = unique_nodes.difference(common_searcher.seen)
- _mutter('Found %s truly unique nodes out of %s',
- len(true_unique_nodes), len(unique_nodes))
- return true_unique_nodes
+ next_unique_searchers = self._collapse_unique_searchers(
+ unique_tip_searchers, unique_are_common_nodes)
+ if len(unique_tip_searchers) != len(next_unique_searchers):
+ if 'graph' in debug.debug_flags:
+ trace.mutter('Collapsed %s unique searchers => %s'
+ ' at %s iterations',
+ len(unique_tip_searchers),
+ len(next_unique_searchers),
+ all_unique_searcher._iterations)
+ unique_tip_searchers = next_unique_searchers
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
def get_parents(self, revisions):
@@ -1095,7 +1179,16 @@
return self
def find_seen_ancestors(self, revisions):
- """Find ancestors of these revisions that have already been seen."""
+ """Find ancestors of these revisions that have already been seen.
+
+ This function generally makes the assumption that querying for the
+ parents of a node that has already been queried is reasonably cheap.
+ (eg, not a round trip to a remote host).
+ """
+ # TODO: Often we might ask one searcher for its seen ancestors, and
+ # then ask another searcher the same question. This can result in
+ # searching the same revisions repeatedly if the two searchers
+ # have a lot of overlap.
all_seen = self.seen
pending = set(revisions).intersection(all_seen)
seen_ancestors = set(pending)
@@ -1130,6 +1223,9 @@
None of the specified revisions are required to be present in the
search list. In this case, the call is a no-op.
"""
+ # TODO: does this help performance?
+ # if not revisions:
+ # return set()
revisions = frozenset(revisions)
if self._returning == 'next':
stopped = self._next_query.intersection(revisions)
More information about the bazaar-commits
mailing list