Rev 6022: Track some timing info. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
John Arbash Meinel
john at arbash-meinel.com
Thu Jul 21 18:14:58 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
------------------------------------------------------------
revno: 6022
revision-id: john at arbash-meinel.com-20110721181442-7xd5o86ti91ip24l
parent: john at arbash-meinel.com-20110721142102-z333hn366uvpntn3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Thu 2011-07-21 20:14:42 +0200
message:
Track some timing info.
Note that the performance improvement shows up even for bzr.
It is 18s with the normal method, and 13s with the 'only recent'
method.
-------------- next part --------------
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2011-07-21 14:21:02 +0000
+++ b/bzrlib/remote.py 2011-07-21 18:14:42 +0000
@@ -1744,7 +1744,7 @@
if parents_map is None:
# Repository is not locked, so there's no cache.
parents_map = {}
- if len(parents_map) < 10000:
+ if True or len(parents_map) < 10000:
# start_set is all the keys in the cache
start_set = set(parents_map)
# result set is all the references to keys in the cache
@@ -1774,10 +1774,9 @@
len(keys.intersection(stop_keys)))
recipe = ('manual', start_set, stop_keys, key_count)
else:
- import pdb; pdb.set_trace()
- # We've searched too many revisions for it to be efficient to
- # recreate our search on the server. So just create a 'minimal'
- # search pattern
+ # TODO: If we use this heavily, then we should just cache the
+ # reverse map. It certainly only changes based on newly
+ # requested entries.
parent_to_children_map = {}
for child, parents in parents_map.iteritems():
for p in parents:
@@ -1789,10 +1788,19 @@
else:
parent_to_children_map[p] = parent_to_children_map[p] + (child,)
stop_keys = set(keys)
+ stop_keys.difference_update(self._unstacked_provider.missing_keys)
# Just look at immediate children
child_keys = set()
for k in keys:
child_keys.update(parent_to_children_map[k])
+ # Without this line, we get the revision count wrong for 'bzr'. I'm
+ # guessing a shortcut caused some revs to be found early, and then
+ # not walked now. So without c for c in parents_map[k] we get *way*
+ # too many keys, because the graph flood-fills. Without 'if c not
+ # in child_keys' we stop before we start and get the wrong answer
+ # that way.
+ map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
+ for k in child_keys])
mutter('Faking search set _get_parent_map_rpc,'
' %d cache size, %d start keys'
' %d included_keys %d stop_keys',
=== modified file 'bzrlib/vf_repository.py'
--- a/bzrlib/vf_repository.py 2011-07-21 14:21:02 +0000
+++ b/bzrlib/vf_repository.py 2011-07-21 18:14:42 +0000
@@ -16,6 +16,7 @@
"""Repository formats built around versioned files."""
+import time
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
@@ -70,7 +71,7 @@
)
from bzrlib.trace import (
- mutter,
+ mutter, note
)
@@ -2503,6 +2504,7 @@
:return: A set of revision ids.
"""
import pdb; pdb.set_trace()
+ t = time.time()
target_graph = self.target.get_graph()
revision_ids = frozenset(revision_ids)
if if_present_ids:
@@ -2515,7 +2517,7 @@
searcher = source_graph._make_breadth_first_searcher(all_wanted_revs)
null_set = frozenset([_mod_revision.NULL_REVISION])
searcher_exhausted = False
- search_step = rev_count = 0
+ search_step = 0
gpm = searcher._parents_provider.get_parent_map
def get_parent_map_logging(revisions):
res = gpm(revisions)
@@ -2560,6 +2562,8 @@
searcher.stop_searching_any(stop_revs)
if searcher_exhausted:
break
+ note('Walking took %.3fs, with %d steps'
+ % (time.time() - t, search_step))
import pdb; pdb.set_trace()
return searcher.get_result()
More information about the bazaar-commits
mailing list