Rev 4408: cleanup passes. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
John Arbash Meinel
john at arbash-meinel.com
Fri Jun 12 05:41:53 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
------------------------------------------------------------
revno: 4408
revision-id: john at arbash-meinel.com-20090612044142-jrqe5yef800lbvaf
parent: john at arbash-meinel.com-20090612041307-jk38nrysmr78f5d0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Thu 2009-06-11 23:41:42 -0500
message:
cleanup passes.
Write some helper functions that avoid having to retype the ugly
inline code everywhere.
Get rid of the special-case exit when popping candidate nodes.
At best it just avoids adding a couple parents to the heap before
we hit the while loop and notice we are done anyway. And it allows
us to remove some extraneous if/break blocks and clean up the code
a bit.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-06-12 04:13:07 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-06-12 04:41:42 +0000
@@ -122,6 +122,29 @@
self.linear_dominator, self.dominator_distance)
+cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
+ cdef PyObject *temp_node
+
+ temp_node = PyList_GET_ITEM(lst, pos)
+ return <_KnownGraphNode>temp_node
+
+
+cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+
+ temp_node = PyTuple_GET_ITEM(parents, pos)
+ return <_KnownGraphNode>temp_node
+
+
+cdef _KnownGraphNode _peek_node(queue):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+
+ temp_node = PyTuple_GET_ITEM(<object>PyList_GET_ITEM(queue, 0), 1)
+ node = <_KnownGraphNode>temp_node
+ return node
+
# TODO: slab allocate all _KnownGraphNode objects.
# We already know how many we are going to need, except for a couple of
# ghosts that could be allocated on demand.
@@ -217,7 +240,7 @@
node.linear_dominator_node = node
node.dominator_distance = 0
return None
- parent_node = <_KnownGraphNode>PyTuple_GET_ITEM(node.parents, 0)
+ parent_node = _get_parent(node.parents, 0)
if PyList_GET_SIZE(parent_node.children) > 1:
# The parent has multiple children, so *this* node is the
# dominator
@@ -270,8 +293,7 @@
dominator = node.linear_dominator_node
num_elements = len(stack)
for i from num_elements > i >= 0:
- temp_node = PyList_GET_ITEM(stack, i)
- next_node = <_KnownGraphNode>temp_node
+ next_node = _get_list_node(stack, i)
next_node.linear_dominator_node = dominator
next_node.dominator_distance = node.dominator_distance + 1
node = next_node
@@ -291,7 +313,6 @@
return tails
def _find_gdfo(self):
- cdef PyObject *temp_node, *temp_tuple
cdef Py_ssize_t pos, pos2
cdef _KnownGraphNode node
cdef _KnownGraphNode child_node
@@ -301,23 +322,18 @@
tails = self._find_tails()
todo = []
for pos from 0 <= pos < PyList_GET_SIZE(tails):
- temp_node = PyList_GET_ITEM(tails, pos)
- node = <_KnownGraphNode>temp_node
+ node = _get_list_node(tails, pos)
node.gdfo = 1
PyList_Append(todo, (1, node))
# No need to heapify, because all tails have priority=1
max_gdfo = len(self._nodes) + 1
while PyList_GET_SIZE(todo) > 0:
- temp_tuple = PyList_GET_ITEM(todo, 0)
- t = <object>temp_tuple
- temp_node = PyTuple_GET_ITEM(t, 1)
- node = <_KnownGraphNode>temp_node
+ node = _peek_node(todo)
next_gdfo = node.gdfo + 1
assert next_gdfo <= max_gdfo
replace_node = 1
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
- temp_node = PyList_GET_ITEM(node.children, pos)
- child_node = <_KnownGraphNode>temp_node
+ child_node = _get_list_node(node.children, pos)
# We should never have numbered children before we numbered
# a parent
if child_node.gdfo != -1:
@@ -329,8 +345,7 @@
if PyTuple_GET_SIZE(child_node.parents) > 1:
missing_parent = 0
for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(child_node.parents):
- temp_node = PyTuple_GET_ITEM(child_node.parents, pos2)
- parent_node = <_KnownGraphNode>temp_node
+ parent_node = _get_parent(child_node.parents, pos2)
if parent_node.gdfo == -1:
missing_parent = 1
break
@@ -455,24 +470,24 @@
cdef int _process_parent(self, _KnownGraphNode node,
_KnownGraphNode parent_node,
candidate_nodes,
- queue) except -1:
- """Process the parent of a node, seeing if we need to walk it.
-
- :return: 0 No extra work needed
- 1 This was a candidate node, and now there is only 1 candidate
- left, so break out of the loop
- """
+ queue, int *replace_item) except -1:
+ """Process the parent of a node, seeing if we need to walk it."""
cdef PyObject *maybe_candidate
maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
if maybe_candidate != NULL:
candidate_nodes.pop(parent_node.key)
- if len(candidate_nodes) <= 1:
- return 1
+ # We could pass up a flag that tells the caller to stop processing,
+ # but it doesn't help much, and makes the code uglier
+ return 0
if parent_node.ancestor_of is None:
# This node hasn't been walked yet, so just project node's ancestor
# info directly to parent_node, and enqueue it for later processing
parent_node.ancestor_of = node.ancestor_of
- heappush(queue, (-parent_node.gdfo, parent_node))
+ if replace_item[0]:
+ heapreplace(queue, (-parent_node.gdfo, parent_node))
+ replace_item[0] = 0
+ else:
+ heappush(queue, (-parent_node.gdfo, parent_node))
PyList_Append(self._to_cleanup, parent_node)
elif parent_node.ancestor_of != node.ancestor_of:
# Combine to get the full set of parents
@@ -488,7 +503,7 @@
cdef _KnownGraphNode node
cdef _KnownGraphNode parent_node
cdef Py_ssize_t num_candidates
- cdef int num_parents
+ cdef int num_parents, replace_item
cdef Py_ssize_t pos
cdef PyObject *temp_node
@@ -505,15 +520,23 @@
# walking
# Now we walk nodes until all nodes that are being walked are 'common'
num_candidates = len(candidate_nodes)
+ replace_item = 0
while PyList_GET_SIZE(queue) > 0 and PyDict_Size(candidate_nodes) > 1:
- temp_node = PyTuple_GET_ITEM(heappop(queue), 1)
- node = <_KnownGraphNode>temp_node
+ if replace_item:
+ # We still need to pop the smallest member out of the queue
+ # before we peek again
+ heappop(queue)
+ if PyList_GET_SIZE(queue) == 0:
+ break
+ # peek at the smallest item. We don't pop, because we expect we'll
+ # need to push more things into the queue anyway
+ node = _peek_node(queue)
+ replace_item = 1
if PyTuple_GET_SIZE(node.ancestor_of) == num_candidates:
# This node is now considered 'common'
# Make sure all parent nodes are marked as such
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
- temp_node = PyTuple_GET_ITEM(node.parents, pos)
- parent_node = <_KnownGraphNode>temp_node
+ parent_node = _get_parent(node.parents, pos)
if parent_node.ancestor_of is not None:
parent_node.ancestor_of = node.ancestor_of
if node.linear_dominator_node is not node:
@@ -530,19 +553,15 @@
# We are at the tip of a long linear region
# We know that there is nothing between here and the tail
# that is interesting, so skip to the end
- if (self._process_parent(node, node.linear_dominator_node,
- candidate_nodes, queue)):
- break
+ self._process_parent(node, node.linear_dominator_node,
+ candidate_nodes, queue, &replace_item)
else:
- for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
- temp_node = PyTuple_GET_ITEM(node.parents, pos)
- parent_node = <_KnownGraphNode>temp_node
- if (self._process_parent(node, parent_node,
- candidate_nodes, queue)):
- break
+ for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
+ parent_node = _get_parent(node.parents, pos)
+ self._process_parent(node, parent_node, candidate_nodes,
+ queue, &replace_item)
for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
- temp_node = PyList_GET_ITEM(self._to_cleanup, pos)
- node = <_KnownGraphNode>temp_node
+ node = _get_list_node(self._to_cleanup, pos)
node.ancestor_of = None
self._to_cleanup = []
return PyFrozenSet_New(candidate_nodes)
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2009-06-11 04:28:57 +0000
+++ b/bzrlib/graph.py 2009-06-12 04:41:42 +0000
@@ -1657,6 +1657,7 @@
return result
+_counters = [0,0,0,0,0,0,0]
try:
from bzrlib._known_graph_pyx import KnownGraph
except ImportError:
=== added file 'tools/time_graph.py'
--- a/tools/time_graph.py 1970-01-01 00:00:00 +0000
+++ b/tools/time_graph.py 2009-06-12 04:41:42 +0000
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+import random
+import os
+import time
+import sys
+import optparse
+from bzrlib import (
+ branch,
+ commands,
+ graph,
+ ui,
+ trace,
+ _known_graph_py,
+ _known_graph_pyx,
+ )
+from bzrlib.ui import text
+
+p = optparse.OptionParser()
+p.add_option('--max-combinations', default=500, type=int)
+p.add_option('--lsprof', default=None, type=str)
+opts, args = p.parse_args(sys.argv[1:])
+trace.enable_default_logging()
+ui.ui_factory = text.TextUIFactory()
+
+if len(args) >= 1:
+ b = branch.Branch.open(args[0])
+else:
+ b = branch.Branch.open('.')
+b.lock_read()
+try:
+ g = b.repository.get_graph()
+ parent_map = dict(p for p in g.iter_ancestry([b.last_revision()])
+ if p[1] is not None)
+finally:
+ b.unlock()
+
+print 'Found %d nodes' % (len(parent_map),)
+
+def all_heads_comp(g, combinations):
+ h = []
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ for idx, combo in enumerate(combinations):
+ if idx & 0x1f == 0:
+ pb.update('proc', idx, len(combinations))
+ h.append(g.heads(combo))
+ finally:
+ pb.finished()
+ return h
+combinations = []
+# parents = parent_map.keys()
+# for p1 in parents:
+# for p2 in random.sample(parents, 10):
+# combinations.append((p1, p2))
+# Times for random sampling of 10x1150 of bzrtools
+# Graph KnownGraph
+# 96.1s vs 25.7s :)
+# Times for 500 'merge parents' from bzr.dev
+# 25.6s vs 45.0s :(
+
+for revision_id, parent_ids in parent_map.iteritems():
+ if parent_ids is not None and len(parent_ids) > 1:
+ combinations.append(parent_ids)
+if opts.max_combinations > 0 and len(combinations) > opts.max_combinations:
+ combinations = random.sample(combinations, opts.max_combinations)
+
+print ' %d combinations' % (len(combinations),)
+t1 = time.clock()
+known_g = _known_graph_py.KnownGraph(parent_map)
+if opts.lsprof is not None:
+ h_known = commands.apply_lsprofiled(opts.lsprof,
+ all_heads_comp, known_g, combinations)
+else:
+ h_known = all_heads_comp(known_g, combinations)
+t2 = time.clock()
+print "Known: %.3fs" % (t2-t1,)
+print " %s" % (graph._counters,)
+t1 = time.clock()
+known_g = _known_graph_pyx.KnownGraph(parent_map)
+if opts.lsprof is not None:
+ h_known = commands.apply_lsprofiled(opts.lsprof,
+ all_heads_comp, known_g, combinations)
+else:
+ h_known = all_heads_comp(known_g, combinations)
+t2 = time.clock()
+print "Known (pyx): %.3fs" % (t2-t1,)
+print " %s" % (graph._counters,)
+simple_g = graph.Graph(graph.DictParentsProvider(parent_map))
+graph._counters[1] = 0
+graph._counters[2] = 0
+h_simple = all_heads_comp(simple_g, combinations)
+t3 = time.clock()
+print "Orig: %.3fs" % (t3-t2,)
+print " %s" % (graph._counters,)
+if h_simple != h_known:
+ import pdb; pdb.set_trace()
+print 'ratio: %.3fs' % ((t2-t1) / (t3-t2))
More information about the bazaar-commits
mailing list