Rev 4418: Simple Pyrex version for the new heads() method. in file:///home/vila/src/bzr/experimental/vila-better-heads/
Vincent Ladeuil
v.ladeuil+lp at free.fr
Thu Jun 18 10:51:39 BST 2009
At file:///home/vila/src/bzr/experimental/vila-better-heads/
------------------------------------------------------------
revno: 4418
revision-id: v.ladeuil+lp at free.fr-20090618095138-rha25usmqwsfwoxv
parent: v.ladeuil+lp at free.fr-20090617163711-iqubyv0tlqr8ayvl
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: vila-better-heads
timestamp: Thu 2009-06-18 11:51:38 +0200
message:
Simple Pyrex version for the new heads() method.
* tools/time_graph.py:
Also display parent_map loading.
* bzrlib/_known_graph_pyx.pyx:
(KnownGraph.heads): pyrex version faster by ~20% than the previous
pyrex version, without using linears dominators. Strictly speaking
linear dominators breaks the algorithm as jumping from one node to
its dominator may miss a candidate. Trying to compensate that
seems to cost more than the benefits it provides :-/ Their use
will surely come back later.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-06-17 16:37:11 +0000
+++ b/bzrlib/_known_graph_py.py 2009-06-18 09:51:38 +0000
@@ -273,7 +273,7 @@
pending = []
min_gdfo = None
for node in candidate_nodes.values():
- if node.parent_keys: # protect against ghosts, jam, fixme ?
+ if node.parent_keys:
pending.extend(node.parent_keys)
if min_gdfo is None or node.gdfo < min_gdfo:
min_gdfo = node.gdfo
@@ -287,7 +287,7 @@
node = nodes[node_key]
if node.gdfo <= min_gdfo:
continue
- if node.parent_keys: # protect against ghosts, jam, fixme ?
+ if node.parent_keys:
pending.extend(node.parent_keys)
heads = heads_key.difference(seen)
if self.do_cache:
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-06-17 16:37:11 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-06-18 09:51:38 +0000
@@ -364,6 +364,74 @@
This is done by searching the ancestries of each key. Any key that is
reachable from another key is not returned; all the others are.
+ This operation scales with the relative depth between any two keys. It
+ uses gdfo to avoid walking all ancestry.
+
+ :param keys: An iterable of keys.
+ :return: A set of the heads. Note that as a set there is no ordering
+ information. Callers will need to filter their input to create
+ order if they need it.
+ """
+ cdef PyObject *maybe_node
+ cdef PyObject *maybe_heads
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+
+ heads_key = PyFrozenSet_New(keys)
+ maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
+ if maybe_heads != NULL:
+ return <object>maybe_heads
+ # Not cached, compute it ourselves
+ candidate_nodes = {}
+ nodes = self._nodes
+ for key in keys:
+ maybe_node = PyDict_GetItem(nodes, key)
+ if maybe_node == NULL:
+ raise KeyError('key %s not in nodes' % (key,))
+ PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
+ if revision.NULL_REVISION in candidate_nodes:
+ # NULL_REVISION is only a head if it is the only entry
+ candidate_nodes.pop(revision.NULL_REVISION)
+ if not candidate_nodes:
+ return set([revision.NULL_REVISION])
+ # The keys changed, so recalculate heads_key
+ heads_key = PyFrozenSet_New(candidate_nodes)
+ if len(candidate_nodes) < 2:
+ return heads_key
+
+ seen = set()
+ pending = []
+ cdef Py_ssize_t pos
+ pos = 0
+ min_gdfo = None
+ while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if node.parents is not None:
+ pending.extend(node.parents)
+ if min_gdfo is None or node.gdfo < min_gdfo:
+ min_gdfo = node.gdfo
+ nodes = self._nodes
+ while pending:
+ node = pending.pop()
+ if node.key in seen:
+ # node already appears in some ancestry
+ continue
+ seen.add(node.key)
+ if node.gdfo <= min_gdfo:
+ continue
+ if node.parents:
+ pending.extend(node.parents)
+ heads = heads_key.difference(seen)
+ if self.do_cache:
+ self._known_heads[heads_key] = heads
+ return heads
+
+ def xheads(self, keys):
+ """Return the heads from amongst keys.
+
+ This is done by searching the ancestries of each key. Any key that is
+ reachable from another key is not returned; all the others are.
+
This operation scales with the relative depth between any two keys. If
any two keys are completely disconnected all ancestry of both sides
will be retrieved.
=== modified file 'tools/time_graph.py'
--- a/tools/time_graph.py 2009-06-16 09:37:53 +0000
+++ b/tools/time_graph.py 2009-06-18 09:51:38 +0000
@@ -22,6 +22,7 @@
trace.enable_default_logging()
ui.ui_factory = text.TextUIFactory()
+t1 = time.clock()
if len(args) >= 1:
b = branch.Branch.open(args[0])
else:
@@ -33,8 +34,9 @@
if p[1] is not None)
finally:
b.unlock()
+t2 = time.clock()
-print 'Found %d nodes' % (len(parent_map),)
+print 'Found %d nodes, loaded in %.3fs' % (len(parent_map), t2-t1)
def all_heads_comp(g, combinations):
h = []
@@ -84,14 +86,14 @@
h_known = all_heads_comp(known_g, combinations)
t2 = time.clock()
print "Known (pyx): %.3fs" % (t2-t1,)
-#print " %s" % (graph._counters,)
+print " %s" % (graph._counters,)
simple_g = graph.Graph(graph.DictParentsProvider(parent_map))
graph._counters[1] = 0
graph._counters[2] = 0
h_simple = all_heads_comp(simple_g, combinations)
t3 = time.clock()
print "Orig: %.3fs" % (t3-t2,)
-#print " %s" % (graph._counters,)
+print " %s" % (graph._counters,)
if h_simple != h_known:
import pdb; pdb.set_trace()
print 'ratio: %.3fs' % ((t2-t1) / (t3-t2))
More information about the bazaar-commits
mailing list