Rev 6023: Start refactoring code into graph.py code for easier testing. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
John Arbash Meinel
john at arbash-meinel.com
Tue Jul 26 13:28:54 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
------------------------------------------------------------
revno: 6023
revision-id: john at arbash-meinel.com-20110726132849-6kno2j5jct20l4u3
parent: john at arbash-meinel.com-20110721181442-7xd5o86ti91ip24l
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Tue 2011-07-26 15:28:49 +0200
message:
Start refactoring code into graph.py code for easier testing.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2011-06-20 11:03:53 +0000
+++ b/bzrlib/graph.py 2011-07-26 13:28:49 +0000
@@ -1891,6 +1891,68 @@
limit=self.limit)
+def ignore():
+ # TODO: If we use this heavily, then we should just cache the
+ # reverse map. It certainly only changes based on newly
+ # requested entries.
+ parent_to_children_map = {}
+ for child, parents in parents_map.iteritems():
+ for p in parents:
+ # Any given parent is likely to have only a small handful
+ # of children, many will have only one.
+ # TODO: Use static_tuple
+ if p not in parent_to_children_map:
+ parent_to_children_map[p] = (child,)
+ else:
+ parent_to_children_map[p] = parent_to_children_map[p] + (child,)
+ stop_keys = set(keys)
+ stop_keys.difference_update(self._unstacked_provider.missing_keys)
+ # Just look at immediate children
+ child_keys = set()
+ for k in keys:
+ child_keys.update(parent_to_children_map[k])
+ # Without this line, we get the revision count wrong for 'bzr'. I'm
+ # guessing a shortcut caused some revs to be found early, and then
+ # not walked now. So without c for c in parents_map[k] we get *way*
+ # too many keys, because the graph flood-fills. Without 'if c not
+ # in child_keys' we stop before we start and get the wrong answer
+ # that way.
+ map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
+ for k in child_keys])
+ mutter('Faking search set _get_parent_map_rpc,'
+ ' %d cache size, %d start keys'
+ ' %d included_keys %d stop_keys',
+ len(parents_map), len(child_keys), len(child_keys),
+ len(keys))
+ recipe = ('manual', child_keys, stop_keys, len(child_keys))
+def search_result_from_parent_map(parent_map, missing_keys):
+ """Transform a parent_map into SearchResult information."""
+ if not parent_map:
+ # parent_map is empty or None, simple search result
+ return [], [], 0
+ # start_set is all the keys in the cache
+ start_set = set(parent_map)
+ # result set is all the references to keys in the cache
+ result_parents = set()
+ for parents in parent_map.itervalues():
+ result_parents.update(parents)
+ stop_keys = result_parents.difference(start_set)
+ # We don't need to send ghosts back to the server as a position to
+ # stop either.
+ stop_keys.difference_update(missing_keys)
+ key_count = len(parent_map)
+ if (revision.NULL_REVISION in result_parents
+ and revision.NULL_REVISION in missing_keys):
+ # If we pruned NULL_REVISION from the stop_keys because it's also
+ # in our cache of "missing" keys we need to increment our key count
+ # by 1, because the reconsitituted SearchResult on the server will
+ # still consider NULL_REVISION to be an included key.
+ key_count += 1
+ included_keys = start_set.intersection(result_parents)
+ start_set.difference_update(included_keys)
+ return start_set, stop_keys, key_count
+
+
def collapse_linear_regions(parent_map):
"""Collapse regions of the graph that are 'linear'.
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2011-07-21 18:14:42 +0000
+++ b/bzrlib/remote.py 2011-07-26 13:28:49 +0000
@@ -1744,69 +1744,10 @@
if parents_map is None:
# Repository is not locked, so there's no cache.
parents_map = {}
- if True or len(parents_map) < 10000:
- # start_set is all the keys in the cache
- start_set = set(parents_map)
- # result set is all the references to keys in the cache
- result_parents = set()
- for parents in parents_map.itervalues():
- result_parents.update(parents)
- stop_keys = result_parents.difference(start_set)
- # We don't need to send ghosts back to the server as a position to
- # stop either.
- stop_keys.difference_update(self._unstacked_provider.missing_keys)
- key_count = len(parents_map)
- if (NULL_REVISION in result_parents
- and NULL_REVISION in self._unstacked_provider.missing_keys):
- # If we pruned NULL_REVISION from the stop_keys because it's also
- # in our cache of "missing" keys we need to increment our key count
- # by 1, because the reconsitituted SearchResult on the server will
- # still consider NULL_REVISION to be an included key.
- key_count += 1
- included_keys = start_set.intersection(result_parents)
- start_set.difference_update(included_keys)
- mutter('Doing _get_parent_map_rpc,'
- ' %d cache size, %d start_set, %d result_parents,'
- ' %d stop_keys, %d included_keys, %d keys,'
- ' %d keys matching stop keys',
- len(parents_map), len(start_set), len(result_parents),
- len(stop_keys), len(included_keys), len(keys),
- len(keys.intersection(stop_keys)))
- recipe = ('manual', start_set, stop_keys, key_count)
- else:
- # TODO: If we use this heavily, then we should just cache the
- # reverse map. It certainly only changes based on newly
- # requested entries.
- parent_to_children_map = {}
- for child, parents in parents_map.iteritems():
- for p in parents:
- # Any given parent is likely to have only a small handful
- # of children, many will have only one.
- # TODO: Use static_tuple
- if p not in parent_to_children_map:
- parent_to_children_map[p] = (child,)
- else:
- parent_to_children_map[p] = parent_to_children_map[p] + (child,)
- stop_keys = set(keys)
- stop_keys.difference_update(self._unstacked_provider.missing_keys)
- # Just look at immediate children
- child_keys = set()
- for k in keys:
- child_keys.update(parent_to_children_map[k])
- # Without this line, we get the revision count wrong for 'bzr'. I'm
- # guessing a shortcut caused some revs to be found early, and then
- # not walked now. So without c for c in parents_map[k] we get *way*
- # too many keys, because the graph flood-fills. Without 'if c not
- # in child_keys' we stop before we start and get the wrong answer
- # that way.
- map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
- for k in child_keys])
- mutter('Faking search set _get_parent_map_rpc,'
- ' %d cache size, %d start keys'
- ' %d included_keys %d stop_keys',
- len(parents_map), len(child_keys), len(child_keys),
- len(keys))
- recipe = ('manual', child_keys, stop_keys, len(child_keys))
+ (start_set, stop_keys,
+ key_count) = graph.search_result_from_parent_map(parents_map,
+ self._unstacked_provider.missing_keys)
+ recipe = ('manual', start_set, stop_keys, key_count)
body = self._serialise_search_recipe(recipe)
path = self.bzrdir._path_for_remote_call(self._client)
for key in keys:
@@ -1815,7 +1756,6 @@
"key %r not a plain string" % (key,))
verb = 'Repository.get_parent_map'
args = (path, 'include-missing:') + tuple(keys)
- mutter('keys: %r body: %r' % (keys, body))
try:
response = self._call_with_body_bytes_expecting_body(
verb, args, body)
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2011-06-20 11:03:53 +0000
+++ b/bzrlib/tests/test_graph.py 2011-07-26 13:28:49 +0000
@@ -1744,3 +1744,44 @@
self.assertEqual(set([NULL_REVISION, 'tip', 'tag', 'mid']), recipe[2])
self.assertEqual(0, recipe[3])
self.assertTrue(result.is_empty())
+
+
+class TestSearchResultFromParentMap(TestGraphBase):
+
+ def assertSearchResult(self, start_keys, stop_keys, key_count, parent_map,
+ missing_keys=()):
+ (start, stop, count) = _mod_graph.search_result_from_parent_map(
+ parent_map, missing_keys)
+ self.assertEqual((sorted(start_keys), sorted(stop_keys), key_count),
+ (sorted(start), sorted(stop), count))
+
+ def test_no_parents(self):
+ self.assertSearchResult([], [], 0, {})
+ self.assertSearchResult([], [], 0, None)
+
+ def test_ancestry_1(self):
+ self.assertSearchResult(['rev4'], [NULL_REVISION], len(ancestry_1),
+ ancestry_1)
+
+ def test_ancestry_2(self):
+ self.assertSearchResult(['rev1b', 'rev4a'], [NULL_REVISION],
+ len(ancestry_2), ancestry_2)
+ self.assertSearchResult(['rev1b', 'rev4a'], [],
+ len(ancestry_2)+1, ancestry_2,
+ missing_keys=[NULL_REVISION])
+
+ def test_partial_search(self):
+ parent_map = dict((k,extended_history_shortcut[k])
+ for k in ['e', 'f'])
+ self.assertSearchResult(['e', 'f'], ['d', 'a'], 2,
+ parent_map)
+ parent_map.update((k,extended_history_shortcut[k])
+ for k in ['d', 'a'])
+ self.assertSearchResult(['e', 'f'], ['c', NULL_REVISION], 4,
+ parent_map)
+ parent_map['c'] = extended_history_shortcut['c']
+ self.assertSearchResult(['e', 'f'], ['b'], 6,
+ parent_map, missing_keys=[NULL_REVISION])
+ parent_map['b'] = extended_history_shortcut['b']
+ self.assertSearchResult(['e', 'f'], [], 7,
+ parent_map, missing_keys=[NULL_REVISION])
More information about the bazaar-commits
mailing list