Rev 6027: Run the search a 3rd time if we encounter heads. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

John Arbash Meinel john at arbash-meinel.com
Thu Aug 11 15:30:45 UTC 2011


At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

------------------------------------------------------------
revno: 6027
revision-id: john at arbash-meinel.com-20110811153030-vciura8b9e04npj3
parent: john at arbash-meinel.com-20110811150126-enmjwt8zig66jau3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Thu 2011-08-11 17:30:30 +0200
message:
  Run the search a 3rd time if we encounter heads.
  
  I think we can just filter the search recipe, but for now I'm working
  on correctness first.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2011-08-11 13:49:07 +0000
+++ b/bzrlib/graph.py	2011-08-11 15:30:30 +0000
@@ -1419,13 +1419,14 @@
         parents_of_found = set()
         # revisions may contain nodes that point to other nodes in revisions:
         # we want to filter them out.
-        self.seen.update(revisions)
+        seen = self.seen
+        seen.update(revisions)
         parent_map = self._parents_provider.get_parent_map(revisions)
         found_revisions.update(parent_map)
         for rev_id, parents in parent_map.iteritems():
             if parents is None:
                 continue
-            new_found_parents = [p for p in parents if p not in self.seen]
+            new_found_parents = [p for p in parents if p not in seen]
             if new_found_parents:
                 # Calling set.update() with an empty generator is actually
                 # rather expensive.
@@ -1933,6 +1934,55 @@
     return child_map
 
 
+def _find_possible_heads(parent_map, tip_keys, depth):
+    """Walk backwards through the parent_map, finding possible heads."""
+    child_map = invert_parent_map(parent_map)
+    heads = set()
+    current_roots = set(tip_keys)
+    walked = set(current_roots)
+    while current_roots and depth:
+        depth -= 1
+        children = set()
+        for p in current_roots:
+            # Is it better to pre- or post- filter the children?
+            try:
+                children.update(child_map[p])
+            except KeyError:
+                heads.add(p)
+        # TODO: Use the right set notation here since 'walked' grows large,
+        #       while 'children' should usually be small
+        # Don't walk keys we've already walked
+        children.difference_update(walked)
+        walked.update(children)
+        current_roots = children
+    if current_roots:
+        heads.update(current_roots)
+    return heads
+
+
+def _run_search(parent_map, heads, exclude_keys):
+    g = Graph(DictParentsProvider(parent_map))
+    s = g._make_breadth_first_searcher(heads)
+    found_heads = set()
+    while True:
+        try:
+            next_revs = s.next()
+        except StopIteration:
+            break
+        for parents in s._current_parents.itervalues():
+            f_heads = heads.intersection(parents)
+            if f_heads:
+                found_heads.update(f_heads)
+        stop_keys = exclude_keys.intersection(next_revs)
+        if stop_keys:
+            s.stop_searching_any(stop_keys)
+    for parents in s._current_parents.itervalues():
+        f_heads = heads.intersection(parents)
+        if f_heads:
+            found_heads.update(f_heads)
+    return s, found_heads
+
+
 def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
                                           depth):
     """Transform a parent_map that is searching 'tip_keys' into an
@@ -1963,37 +2013,16 @@
     :param tip_keys: the revision_ids that we are searching
     :param depth: How far back to walk.
     """
-    child_map = invert_parent_map(parent_map)
-    exclude_keys = set(tip_keys)
-    heads = set()
-    current_roots = set(tip_keys)
-    walked = set(current_roots)
-    while current_roots and depth:
-        depth -= 1
-        children = set()
-        for p in current_roots:
-            # Is it better to pre- or post- filter the children?
-            try:
-                children.update(child_map[p])
-            except KeyError:
-                heads.add(p)
-        # TODO: Use the right set notation here since 'walked' grows large,
-        #       while 'children' should usually be small
-        # Don't walk keys we've already walked
-        children.difference_update(walked)
-        walked.update(children)
-        current_roots = children
-    if current_roots:
-        heads.update(current_roots)
-    start_keys = current_roots
-    g = Graph(DictParentsProvider(parent_map))
-    s = g._make_breadth_first_searcher(heads)
-    while True:
-        try:
-            next_revs = s.next()
-        except StopIteration:
-            break
-        s.stop_searching_any(exclude_keys.intersection(next_revs))
+    heads = _find_possible_heads(parent_map, tip_keys, depth)
+    s, found_heads = _run_search(parent_map, heads, set(tip_keys))
+    if found_heads:
+        s2, extra_heads = _run_search(parent_map, heads - found_heads,
+                                      set(tip_keys))
+        result1 = s.get_result()
+        result2 = s2.get_result()
+        assert result1._keys == result2._keys
+        assert not extra_heads
+        return result2.get_recipe()[1:]
     return s.get_result().get_recipe()[1:]
 
 

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2011-08-11 15:01:26 +0000
+++ b/bzrlib/tests/test_graph.py	2011-08-11 15:30:30 +0000
@@ -1808,7 +1808,7 @@
                                 extended_history_shortcut, (), ['a'], 10)
         # Note that even though we only take 1 step back, we find 'f', which
         # means the described search will still find d and c.
-        self.assertSearchResult(['b', 'f'], ['a'], 4,
+        self.assertSearchResult(['f'], ['a'], 4,
                                 extended_history_shortcut, (), ['a'], 1)
-        self.assertSearchResult(['c', 'f'], ['a'], 4,
+        self.assertSearchResult(['f'], ['a'], 4,
                                 extended_history_shortcut, (), ['a'], 2)



More information about the bazaar-commits mailing list