Rev 6023: Start refactoring code into graph.py code for easier testing. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

Tue Jul 26 13:28:54 UTC 2011

At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

------------------------------------------------------------
revno: 6023
revision-id: john at arbash-meinel.com-20110726132849-6kno2j5jct20l4u3
parent: john at arbash-meinel.com-20110721181442-7xd5o86ti91ip24l
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Tue 2011-07-26 15:28:49 +0200
message:
  Start refactoring code into graph.py code for easier testing.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2011-06-20 11:03:53 +0000
+++ b/bzrlib/graph.py	2011-07-26 13:28:49 +0000
@@ -1891,6 +1891,68 @@
             limit=self.limit)
 
 
+def ignore():
+            # TODO: If we use this heavily, then we should just cache the
+            #       reverse map. It certainly only changes based on newly
+            #       requested entries.
+            parent_to_children_map = {}
+            for child, parents in parents_map.iteritems():
+                for p in parents:
+                    # Any given parent is likely to have only a small handful
+                    # of children, many will have only one.
+                    # TODO: Use static_tuple
+                    if p not in parent_to_children_map:
+                        parent_to_children_map[p] = (child,)
+                    else:
+                        parent_to_children_map[p] = parent_to_children_map[p] + (child,)
+            stop_keys = set(keys)
+            stop_keys.difference_update(self._unstacked_provider.missing_keys)
+            # Just look at immediate children
+            child_keys = set()
+            for k in keys:
+                child_keys.update(parent_to_children_map[k])
+            # Without this line, we get the revision count wrong for 'bzr'. I'm
+            # guessing a shortcut caused some revs to be found early, and then
+            # not walked now. So without c for c in parents_map[k] we get *way*
+            # too many keys, because the graph flood-fills. Without 'if c not
+            # in child_keys' we stop before we start and get the wrong answer
+            # that way.
+            map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
+                                   for k in child_keys])
+            mutter('Faking search set _get_parent_map_rpc,'
+                         ' %d cache size, %d start keys'
+                         ' %d included_keys %d stop_keys',
+                         len(parents_map), len(child_keys), len(child_keys),
+                         len(keys))
+            recipe = ('manual', child_keys, stop_keys, len(child_keys))
+def search_result_from_parent_map(parent_map, missing_keys):
+    """Transform a parent_map into SearchResult information."""
+    if not parent_map:
+        # parent_map is empty or None, simple search result
+        return [], [], 0
+    # start_set is all the keys in the cache
+    start_set = set(parent_map)
+    # result set is all the references to keys in the cache
+    result_parents = set()
+    for parents in parent_map.itervalues():
+        result_parents.update(parents)
+    stop_keys = result_parents.difference(start_set)
+    # We don't need to send ghosts back to the server as a position to
+    # stop either.
+    stop_keys.difference_update(missing_keys)
+    key_count = len(parent_map)
+    if (revision.NULL_REVISION in result_parents
+        and revision.NULL_REVISION in missing_keys):
+        # If we pruned NULL_REVISION from the stop_keys because it's also
+        # in our cache of "missing" keys we need to increment our key count
+        # by 1, because the reconsitituted SearchResult on the server will
+        # still consider NULL_REVISION to be an included key.
+        key_count += 1
+    included_keys = start_set.intersection(result_parents)
+    start_set.difference_update(included_keys)
+    return start_set, stop_keys, key_count
+
+
 def collapse_linear_regions(parent_map):
     """Collapse regions of the graph that are 'linear'.
 

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2011-07-21 18:14:42 +0000
+++ b/bzrlib/remote.py	2011-07-26 13:28:49 +0000
@@ -1744,69 +1744,10 @@
         if parents_map is None:
             # Repository is not locked, so there's no cache.
             parents_map = {}
-        if True or len(parents_map) < 10000:
-            # start_set is all the keys in the cache
-            start_set = set(parents_map)
-            # result set is all the references to keys in the cache
-            result_parents = set()
-            for parents in parents_map.itervalues():
-                result_parents.update(parents)
-            stop_keys = result_parents.difference(start_set)
-            # We don't need to send ghosts back to the server as a position to
-            # stop either.
-            stop_keys.difference_update(self._unstacked_provider.missing_keys)
-            key_count = len(parents_map)
-            if (NULL_REVISION in result_parents
-                and NULL_REVISION in self._unstacked_provider.missing_keys):
-                # If we pruned NULL_REVISION from the stop_keys because it's also
-                # in our cache of "missing" keys we need to increment our key count
-                # by 1, because the reconsitituted SearchResult on the server will
-                # still consider NULL_REVISION to be an included key.
-                key_count += 1
-            included_keys = start_set.intersection(result_parents)
-            start_set.difference_update(included_keys)
-            mutter('Doing _get_parent_map_rpc,'
-                         ' %d cache size, %d start_set, %d result_parents,'
-                         ' %d stop_keys, %d included_keys, %d keys,'
-                         ' %d keys matching stop keys',
-                         len(parents_map), len(start_set), len(result_parents),
-                         len(stop_keys), len(included_keys), len(keys),
-                         len(keys.intersection(stop_keys)))
-            recipe = ('manual', start_set, stop_keys, key_count)
-        else:
-            # TODO: If we use this heavily, then we should just cache the
-            #       reverse map. It certainly only changes based on newly
-            #       requested entries.
-            parent_to_children_map = {}
-            for child, parents in parents_map.iteritems():
-                for p in parents:
-                    # Any given parent is likely to have only a small handful
-                    # of children, many will have only one.
-                    # TODO: Use static_tuple
-                    if p not in parent_to_children_map:
-                        parent_to_children_map[p] = (child,)
-                    else:
-                        parent_to_children_map[p] = parent_to_children_map[p] + (child,)
-            stop_keys = set(keys)
-            stop_keys.difference_update(self._unstacked_provider.missing_keys)
-            # Just look at immediate children
-            child_keys = set()
-            for k in keys:
-                child_keys.update(parent_to_children_map[k])
-            # Without this line, we get the revision count wrong for 'bzr'. I'm
-            # guessing a shortcut caused some revs to be found early, and then
-            # not walked now. So without c for c in parents_map[k] we get *way*
-            # too many keys, because the graph flood-fills. Without 'if c not
-            # in child_keys' we stop before we start and get the wrong answer
-            # that way.
-            map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
-                                   for k in child_keys])
-            mutter('Faking search set _get_parent_map_rpc,'
-                         ' %d cache size, %d start keys'
-                         ' %d included_keys %d stop_keys',
-                         len(parents_map), len(child_keys), len(child_keys),
-                         len(keys))
-            recipe = ('manual', child_keys, stop_keys, len(child_keys))
+        (start_set, stop_keys,
+         key_count) = graph.search_result_from_parent_map(parents_map,
+            self._unstacked_provider.missing_keys)
+        recipe = ('manual', start_set, stop_keys, key_count)
         body = self._serialise_search_recipe(recipe)
         path = self.bzrdir._path_for_remote_call(self._client)
         for key in keys:
@@ -1815,7 +1756,6 @@
                     "key %r not a plain string" % (key,))
         verb = 'Repository.get_parent_map'
         args = (path, 'include-missing:') + tuple(keys)
-        mutter('keys: %r body: %r' % (keys, body))
         try:
             response = self._call_with_body_bytes_expecting_body(
                 verb, args, body)

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2011-06-20 11:03:53 +0000
+++ b/bzrlib/tests/test_graph.py	2011-07-26 13:28:49 +0000
@@ -1744,3 +1744,44 @@
         self.assertEqual(set([NULL_REVISION, 'tip', 'tag', 'mid']), recipe[2])
         self.assertEqual(0, recipe[3])
         self.assertTrue(result.is_empty())
+
+
+class TestSearchResultFromParentMap(TestGraphBase):
+
+    def assertSearchResult(self, start_keys, stop_keys, key_count, parent_map,
+                           missing_keys=()):
+        (start, stop, count) = _mod_graph.search_result_from_parent_map(
+            parent_map, missing_keys)
+        self.assertEqual((sorted(start_keys), sorted(stop_keys), key_count),
+                         (sorted(start), sorted(stop), count))
+
+    def test_no_parents(self):
+        self.assertSearchResult([], [], 0, {})
+        self.assertSearchResult([], [], 0, None)
+
+    def test_ancestry_1(self):
+        self.assertSearchResult(['rev4'], [NULL_REVISION], len(ancestry_1),
+                                ancestry_1)
+
+    def test_ancestry_2(self):
+        self.assertSearchResult(['rev1b', 'rev4a'], [NULL_REVISION],
+                                len(ancestry_2), ancestry_2)
+        self.assertSearchResult(['rev1b', 'rev4a'], [],
+                                len(ancestry_2)+1, ancestry_2,
+                                missing_keys=[NULL_REVISION])
+
+    def test_partial_search(self):
+        parent_map = dict((k,extended_history_shortcut[k])
+                          for k in ['e', 'f'])
+        self.assertSearchResult(['e', 'f'], ['d', 'a'], 2,
+                                parent_map)
+        parent_map.update((k,extended_history_shortcut[k])
+                          for k in ['d', 'a'])
+        self.assertSearchResult(['e', 'f'], ['c', NULL_REVISION], 4,
+                                parent_map)
+        parent_map['c'] = extended_history_shortcut['c']
+        self.assertSearchResult(['e', 'f'], ['b'], 6,
+                                parent_map, missing_keys=[NULL_REVISION])
+        parent_map['b'] = extended_history_shortcut['b']
+        self.assertSearchResult(['e', 'f'], [], 7,
+                                parent_map, missing_keys=[NULL_REVISION])