Rev 6022: Track some timing info. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

John Arbash Meinel john at arbash-meinel.com
Thu Jul 21 18:14:58 UTC 2011


At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269

------------------------------------------------------------
revno: 6022
revision-id: john at arbash-meinel.com-20110721181442-7xd5o86ti91ip24l
parent: john at arbash-meinel.com-20110721142102-z333hn366uvpntn3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Thu 2011-07-21 20:14:42 +0200
message:
  Track some timing info.
  
  Note that the performance improvement shows up even for bzr.
  It is 18s with the normal method, and 13s with the 'only recent'
  method.
-------------- next part --------------
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2011-07-21 14:21:02 +0000
+++ b/bzrlib/remote.py	2011-07-21 18:14:42 +0000
@@ -1744,7 +1744,7 @@
         if parents_map is None:
             # Repository is not locked, so there's no cache.
             parents_map = {}
-        if len(parents_map) < 10000:
+        if True or len(parents_map) < 10000:
             # start_set is all the keys in the cache
             start_set = set(parents_map)
             # result set is all the references to keys in the cache
@@ -1774,10 +1774,9 @@
                          len(keys.intersection(stop_keys)))
             recipe = ('manual', start_set, stop_keys, key_count)
         else:
-            import pdb; pdb.set_trace()
-            # We've searched too many revisions for it to be efficient to
-            # recreate our search on the server. So just create a 'minimal'
-            # search pattern
+            # TODO: If we use this heavily, then we should just cache the
+            #       reverse map. It certainly only changes based on newly
+            #       requested entries.
             parent_to_children_map = {}
             for child, parents in parents_map.iteritems():
                 for p in parents:
@@ -1789,10 +1788,19 @@
                     else:
                         parent_to_children_map[p] = parent_to_children_map[p] + (child,)
             stop_keys = set(keys)
+            stop_keys.difference_update(self._unstacked_provider.missing_keys)
             # Just look at immediate children
             child_keys = set()
             for k in keys:
                 child_keys.update(parent_to_children_map[k])
+            # Without this line, we get the revision count wrong for 'bzr'. I'm
+            # guessing a shortcut caused some revs to be found early, and then
+            # not walked now. So without c for c in parents_map[k] we get *way*
+            # too many keys, because the graph flood-fills. Without 'if c not
+            # in child_keys' we stop before we start and get the wrong answer
+            # that way.
+            map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
+                                   for k in child_keys])
             mutter('Faking search set _get_parent_map_rpc,'
                          ' %d cache size, %d start keys'
                          ' %d included_keys %d stop_keys',

=== modified file 'bzrlib/vf_repository.py'
--- a/bzrlib/vf_repository.py	2011-07-21 14:21:02 +0000
+++ b/bzrlib/vf_repository.py	2011-07-21 18:14:42 +0000
@@ -16,6 +16,7 @@
 
 """Repository formats built around versioned files."""
 
+import time
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
@@ -70,7 +71,7 @@
     )
 
 from bzrlib.trace import (
-    mutter,
+    mutter, note
     )
 
 
@@ -2503,6 +2504,7 @@
         :return: A set of revision ids.
         """
         import pdb; pdb.set_trace()
+        t = time.time()
         target_graph = self.target.get_graph()
         revision_ids = frozenset(revision_ids)
         if if_present_ids:
@@ -2515,7 +2517,7 @@
         searcher = source_graph._make_breadth_first_searcher(all_wanted_revs)
         null_set = frozenset([_mod_revision.NULL_REVISION])
         searcher_exhausted = False
-        search_step = rev_count = 0
+        search_step = 0
         gpm = searcher._parents_provider.get_parent_map
         def get_parent_map_logging(revisions):
             res = gpm(revisions)
@@ -2560,6 +2562,8 @@
                 searcher.stop_searching_any(stop_revs)
             if searcher_exhausted:
                 break
+        note('Walking took %.3fs, with %d steps'
+             % (time.time() - t, search_step))
         import pdb; pdb.set_trace()
         return searcher.get_result()
 



More information about the bazaar-commits mailing list