Rev 4379: Add the time_graph code for benchmarking. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 8 22:03:20 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
------------------------------------------------------------
revno: 4379
revision-id: john at arbash-meinel.com-20090608210315-my1czgs64q2yywyd
parent: john at arbash-meinel.com-20090608201222-gzehwzf2q98gmk0d
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Mon 2009-06-08 16:03:15 -0500
message:
Add the time_graph code for benchmarking.
Add a lot of debugging counters, etc.
At the moment, 'random sampling' shows a performance improvement of 4:1
however, just checking 'parents of a merge' in bzr.dev shows a
performance *loss* of about 1:2. So still need to do a bit more
probing to figure out why. It may be the overhead of using objects,
or it may be an issue of the algorithm.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2009-06-08 20:12:22 +0000
+++ b/bzrlib/graph.py 2009-06-08 21:03:15 +0000
@@ -1196,6 +1196,7 @@
self.linear_dominator, self.dominator_distance)
+_counters = [0, 0, 0, 0, 0]
class KnownGraph(object):
"""This is a class which assumes we already know the full graph."""
@@ -1375,17 +1376,21 @@
def _heads_from_candidate_nodes(self, candidate_nodes):
queue = []
+ to_cleanup = []
for node in candidate_nodes.itervalues():
assert node.ancestor_of is None
node.ancestor_of = (node.key,)
queue.append((-node.gdfo, node))
+ to_cleanup.append(node)
heapq.heapify(queue)
# These are nodes that we determined are 'common' that we are no longer
# walking
stop_nodes = {}
# Now we walk nodes until all nodes that are being walked are 'common'
num_candidates = len(candidate_nodes)
+ counters = _counters
while queue and len(candidate_nodes) > 1:
+ counters[0] += 1
_, next = heapq.heappop(queue)
assert not next.was_walked
assert next.ancestor_of is not None
@@ -1400,39 +1405,46 @@
continue
# Now project the current nodes ancestor list to the parent nodes,
# and queue them up to be walked
- for parent_key in next.parent_keys:
- if parent_key in candidate_nodes:
- candidate_nodes.pop(parent_key)
- parent_node = self._nodes[parent_key]
- assert not parent_node.was_walked
- if parent_node.ancestor_of is None:
- # This node hasn't been walked yet
- parent_node.ancestor_of = node.ancestor_of
- # Enqueue this node
- heapq.heappush(queue, (-parent_node.gdfo, parent_node))
- else:
- # Combine to get the full set of parents
- all_ancestors = set(parent_node.ancestor_of)
- all_ancestors.update(node.ancestor_of)
- all_ancestors = tuple(sorted(all_ancestors))
- parent_node.ancestor_of = all_ancestors
- # This would otherwise require popping the item out of the
- # queue, because we think we are done processing it.
- # As is, we'll just let the queue clean itself up later.
- # if len(parent_node.ancestor_of) == num_candidates:
- # # This is now a common node
- # stop_nodes[parent_node.key] = parent_node
- # This node should already be enqueued by whoever walked it
- # earlier
+ def queue_parents():
+ for parent_key in next.parent_keys:
+ counters[1] += 1
+ if parent_key in candidate_nodes:
+ candidate_nodes.pop(parent_key)
+ parent_node = self._nodes[parent_key]
+ assert not parent_node.was_walked
+ if parent_node.ancestor_of is None:
+ counters[2] += 1
+ # This node hasn't been walked yet
+ parent_node.ancestor_of = node.ancestor_of
+ # Enqueue this node
+ heapq.heappush(queue, (-parent_node.gdfo, parent_node))
+ to_cleanup.append(parent_node)
+ else:
+ counters[3] += 1
+ # Combine to get the full set of parents
+ def combine_parents():
+ all_ancestors = set(parent_node.ancestor_of)
+ all_ancestors.update(node.ancestor_of)
+ return tuple(sorted(all_ancestors))
+ parent_node.ancestor_of = combine_parents()
+ # This would otherwise require popping the item out of the
+ # queue, because we think we are done processing it.
+ # As is, we'll just let the queue clean itself up later.
+ # if len(parent_node.ancestor_of) == num_candidates:
+ # # This is now a common node
+ # stop_nodes[parent_node.key] = parent_node
+ # This node should already be enqueued by whoever walked it
+ # earlier
+ queue_parents()
# import pdb; pdb.set_trace()
- self._clear_ancestry_info()
+ def cleanup():
+ for node in to_cleanup:
+ counters[4] += 1
+ node.ancestor_of = None
+ node.was_walked = False
+ cleanup()
return set(candidate_nodes)
- def _clear_ancestry_info(self):
- for node in self._nodes.itervalues():
- node.ancestor_of = None
- node.was_walked = False
-
def get_parent_map(self, keys):
# Thunk to match the Graph._parents_provider api.
nodes = [self._nodes[key] for key in keys]
=== added file 'tools/time_graph.py'
--- a/tools/time_graph.py 1970-01-01 00:00:00 +0000
+++ b/tools/time_graph.py 2009-06-08 21:03:15 +0000
@@ -0,0 +1,63 @@
+import random
+import time
+import sys
+import optparse
+from bzrlib import branch, graph, ui, trace
+from bzrlib.ui import text
+
+p = optparse.OptionParser()
+p.add_option('--one')
+opts, args = p.parse_args(sys.argv[1:])
+trace.enable_default_logging()
+ui.ui_factory = text.TextUIFactory()
+
+if len(args) >= 1:
+ b = branch.Branch.open(args[0])
+else:
+ b = branch.Branch.open('.')
+b.lock_read()
+try:
+ g = b.repository.get_graph()
+ parent_map = dict(p for p in g.iter_ancestry([b.last_revision()])
+ if p[1] is not None)
+finally:
+ b.unlock()
+
+print 'Found %d nodes' % (len(parent_map),)
+
+def all_heads_comp(g, combinations):
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ for idx, combo in enumerate(combinations):
+ pb.update('proc', idx, len(combinations))
+ g.heads(combo)
+ finally:
+ pb.finished()
+combinations = []
+# parents = parent_map.keys()
+# for p1 in parents:
+# for p2 in random.sample(parents, 10):
+# combinations.append((p1, p2))
+# Times for random sampling of 10x1150 of bzrtools
+# Graph KnownGraph
+# 96.1s vs 25.7s :)
+# Times for 500 'merge parents' from bzr.dev
+# 25.6s vs 45.0s :(
+
+for revision_id, parent_ids in parent_map.iteritems():
+ if parent_ids is not None and len(parent_ids) > 1:
+ combinations.append(parent_ids)
+if len(combinations) > 500:
+ combinations = random.sample(combinations, 500)
+
+print ' %d combinations' % (len(combinations),)
+t1 = time.clock()
+known_g = graph.KnownGraph(parent_map)
+all_heads_comp(known_g, combinations)
+t2 = time.clock()
+print "Known: %.3fs" % (t2-t1,)
+print " %s" % (graph._counters,)
+simple_g = graph.Graph(graph.DictParentsProvider(parent_map))
+all_heads_comp(simple_g, combinations)
+t3 = time.clock()
+print "Orig: %.3fs" % (t3-t2,)
More information about the bazaar-commits
mailing list