Rev 4379: Add the time_graph code for benchmarking. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

John Arbash Meinel john at arbash-meinel.com
Mon Jun 8 22:03:20 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

------------------------------------------------------------
revno: 4379
revision-id: john at arbash-meinel.com-20090608210315-my1czgs64q2yywyd
parent: john at arbash-meinel.com-20090608201222-gzehwzf2q98gmk0d
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Mon 2009-06-08 16:03:15 -0500
message:
  Add the time_graph code for benchmarking.
  
  Add a lot of debugging counters, etc.
  
  At the moment, 'random sampling' shows a performance improvement of 4:1
  however, just checking 'parents of a merge' in bzr.dev shows a
  performance *loss* of about 1:2. So still need to do a bit more
  probing to figure out why. It may be the overhead of using objects,
  or it may be an issue of the algorithm.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2009-06-08 20:12:22 +0000
+++ b/bzrlib/graph.py	2009-06-08 21:03:15 +0000
@@ -1196,6 +1196,7 @@
             self.linear_dominator, self.dominator_distance)
 
 
+_counters = [0, 0, 0, 0, 0]
 class KnownGraph(object):
     """This is a class which assumes we already know the full graph."""
 
@@ -1375,17 +1376,21 @@
 
     def _heads_from_candidate_nodes(self, candidate_nodes):
         queue = []
+        to_cleanup = []
         for node in candidate_nodes.itervalues():
             assert node.ancestor_of is None
             node.ancestor_of = (node.key,)
             queue.append((-node.gdfo, node))
+            to_cleanup.append(node)
         heapq.heapify(queue)
         # These are nodes that we determined are 'common' that we are no longer
         # walking
         stop_nodes = {}
         # Now we walk nodes until all nodes that are being walked are 'common'
         num_candidates = len(candidate_nodes)
+        counters = _counters
         while queue and len(candidate_nodes) > 1:
+            counters[0] += 1
             _, next = heapq.heappop(queue)
             assert not next.was_walked
             assert next.ancestor_of is not None
@@ -1400,39 +1405,46 @@
                 continue
             # Now project the current nodes ancestor list to the parent nodes,
             # and queue them up to be walked
-            for parent_key in next.parent_keys:
-                if parent_key in candidate_nodes:
-                    candidate_nodes.pop(parent_key)
-                parent_node = self._nodes[parent_key]
-                assert not parent_node.was_walked
-                if parent_node.ancestor_of is None:
-                    # This node hasn't been walked yet
-                    parent_node.ancestor_of = node.ancestor_of
-                    # Enqueue this node
-                    heapq.heappush(queue, (-parent_node.gdfo, parent_node))
-                else:
-                    # Combine to get the full set of parents
-                    all_ancestors = set(parent_node.ancestor_of)
-                    all_ancestors.update(node.ancestor_of)
-                    all_ancestors = tuple(sorted(all_ancestors))
-                    parent_node.ancestor_of = all_ancestors
-                    # This would otherwise require popping the item out of the
-                    # queue, because we think we are done processing it.
-                    # As is, we'll just let the queue clean itself up later.
-                    # if len(parent_node.ancestor_of) == num_candidates:
-                    #     # This is now a common node
-                    #     stop_nodes[parent_node.key] = parent_node
-                    # This node should already be enqueued by whoever walked it
-                    # earlier
+            def queue_parents():
+                for parent_key in next.parent_keys:
+                    counters[1] += 1
+                    if parent_key in candidate_nodes:
+                        candidate_nodes.pop(parent_key)
+                    parent_node = self._nodes[parent_key]
+                    assert not parent_node.was_walked
+                    if parent_node.ancestor_of is None:
+                        counters[2] += 1
+                        # This node hasn't been walked yet
+                        parent_node.ancestor_of = node.ancestor_of
+                        # Enqueue this node
+                        heapq.heappush(queue, (-parent_node.gdfo, parent_node))
+                        to_cleanup.append(parent_node)
+                    else:
+                        counters[3] += 1
+                        # Combine to get the full set of parents
+                        def combine_parents():
+                            all_ancestors = set(parent_node.ancestor_of)
+                            all_ancestors.update(node.ancestor_of)
+                            return tuple(sorted(all_ancestors))
+                        parent_node.ancestor_of = combine_parents()
+                        # This would otherwise require popping the item out of the
+                        # queue, because we think we are done processing it.
+                        # As is, we'll just let the queue clean itself up later.
+                        # if len(parent_node.ancestor_of) == num_candidates:
+                        #     # This is now a common node
+                        #     stop_nodes[parent_node.key] = parent_node
+                        # This node should already be enqueued by whoever walked it
+                        # earlier
+            queue_parents()
         # import pdb; pdb.set_trace()
-        self._clear_ancestry_info()
+        def cleanup():
+            for node in to_cleanup:
+                counters[4] += 1
+                node.ancestor_of = None
+                node.was_walked = False
+        cleanup()
         return set(candidate_nodes)
 
-    def _clear_ancestry_info(self):
-        for node in self._nodes.itervalues():
-            node.ancestor_of = None
-            node.was_walked = False
-
     def get_parent_map(self, keys):
         # Thunk to match the Graph._parents_provider api.
         nodes = [self._nodes[key] for key in keys]

=== added file 'tools/time_graph.py'
--- a/tools/time_graph.py	1970-01-01 00:00:00 +0000
+++ b/tools/time_graph.py	2009-06-08 21:03:15 +0000
@@ -0,0 +1,63 @@
+import random
+import time
+import sys
+import optparse
+from bzrlib import branch, graph, ui, trace
+from bzrlib.ui import text
+
+p = optparse.OptionParser()
+p.add_option('--one')
+opts, args = p.parse_args(sys.argv[1:])
+trace.enable_default_logging()
+ui.ui_factory = text.TextUIFactory()
+
+if len(args) >= 1:
+    b = branch.Branch.open(args[0])
+else:
+    b = branch.Branch.open('.')
+b.lock_read()
+try:
+    g = b.repository.get_graph()
+    parent_map = dict(p for p in g.iter_ancestry([b.last_revision()])
+                         if p[1] is not None)
+finally:
+    b.unlock()
+
+print 'Found %d nodes' % (len(parent_map),)
+
+def all_heads_comp(g, combinations):
+    pb = ui.ui_factory.nested_progress_bar()
+    try:
+        for idx, combo in enumerate(combinations):
+            pb.update('proc', idx, len(combinations))
+            g.heads(combo)
+    finally:
+        pb.finished()
+combinations = []
+# parents = parent_map.keys()
+# for p1 in parents:
+#     for p2 in random.sample(parents, 10):
+#         combinations.append((p1, p2))
+# Times for random sampling of 10x1150 of bzrtools
+#   Graph        KnownGraph
+#   96.1s   vs   25.7s  :)
+# Times for 500 'merge parents' from bzr.dev
+#   25.6s   vs   45.0s  :(
+
+for revision_id, parent_ids in parent_map.iteritems():
+    if parent_ids is not None and len(parent_ids) > 1:
+        combinations.append(parent_ids)
+if len(combinations) > 500:
+    combinations = random.sample(combinations, 500)
+
+print '      %d combinations' % (len(combinations),)
+t1 = time.clock()
+known_g = graph.KnownGraph(parent_map)
+all_heads_comp(known_g, combinations)
+t2 = time.clock()
+print "Known: %.3fs" % (t2-t1,)
+print "  %s" % (graph._counters,)
+simple_g = graph.Graph(graph.DictParentsProvider(parent_map))
+all_heads_comp(simple_g, combinations)
+t3 = time.clock()
+print "Orig: %.3fs" % (t3-t2,)



More information about the bazaar-commits mailing list