Rev 4418: Simple Pyrex version for the new heads() method. in file:///home/vila/src/bzr/experimental/vila-better-heads/

Vincent Ladeuil v.ladeuil+lp at free.fr
Thu Jun 18 10:51:39 BST 2009


At file:///home/vila/src/bzr/experimental/vila-better-heads/

------------------------------------------------------------
revno: 4418
revision-id: v.ladeuil+lp at free.fr-20090618095138-rha25usmqwsfwoxv
parent: v.ladeuil+lp at free.fr-20090617163711-iqubyv0tlqr8ayvl
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: vila-better-heads
timestamp: Thu 2009-06-18 11:51:38 +0200
message:
  Simple Pyrex version for the new heads() method.
  
  * tools/time_graph.py: 
  Also display parent_map loading.
  
  * bzrlib/_known_graph_pyx.pyx:
  (KnownGraph.heads): pyrex version faster by ~20% than the previous
  pyrex version, without using linears dominators. Strictly speaking
  linear dominators breaks the algorithm as jumping from one node to
  its dominator may miss a candidate. Trying to compensate that
  seems to cost more than the benefits it provides :-/ Their use
  will surely come back later.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-06-17 16:37:11 +0000
+++ b/bzrlib/_known_graph_py.py	2009-06-18 09:51:38 +0000
@@ -273,7 +273,7 @@
         pending = []
         min_gdfo = None
         for node in candidate_nodes.values():
-            if node.parent_keys: # protect against ghosts, jam, fixme ?
+            if node.parent_keys:
                 pending.extend(node.parent_keys)
             if min_gdfo is None or node.gdfo < min_gdfo:
                 min_gdfo = node.gdfo
@@ -287,7 +287,7 @@
             node = nodes[node_key]
             if node.gdfo <= min_gdfo:
                 continue
-            if node.parent_keys: # protect against ghosts, jam, fixme ?
+            if node.parent_keys:
                 pending.extend(node.parent_keys)
         heads = heads_key.difference(seen)
         if self.do_cache:

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-06-17 16:37:11 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-06-18 09:51:38 +0000
@@ -364,6 +364,74 @@
         This is done by searching the ancestries of each key.  Any key that is
         reachable from another key is not returned; all the others are.
 
+        This operation scales with the relative depth between any two keys. It
+        uses gdfo to avoid walking all ancestry.
+
+        :param keys: An iterable of keys.
+        :return: A set of the heads. Note that as a set there is no ordering
+            information. Callers will need to filter their input to create
+            order if they need it.
+        """
+        cdef PyObject *maybe_node
+        cdef PyObject *maybe_heads
+        cdef PyObject *temp_node
+        cdef _KnownGraphNode node
+
+        heads_key = PyFrozenSet_New(keys)
+        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
+        if maybe_heads != NULL:
+            return <object>maybe_heads
+        # Not cached, compute it ourselves
+        candidate_nodes = {}
+        nodes = self._nodes
+        for key in keys:
+            maybe_node = PyDict_GetItem(nodes, key)
+            if maybe_node == NULL:
+                raise KeyError('key %s not in nodes' % (key,))
+            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
+        if revision.NULL_REVISION in candidate_nodes:
+            # NULL_REVISION is only a head if it is the only entry
+            candidate_nodes.pop(revision.NULL_REVISION)
+            if not candidate_nodes:
+                return set([revision.NULL_REVISION])
+            # The keys changed, so recalculate heads_key
+            heads_key = PyFrozenSet_New(candidate_nodes)
+        if len(candidate_nodes) < 2:
+            return heads_key
+
+        seen = set()
+        pending = []
+        cdef Py_ssize_t pos
+        pos = 0
+        min_gdfo = None
+        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            if node.parents is not None:
+                pending.extend(node.parents)
+            if min_gdfo is None or node.gdfo < min_gdfo:
+                min_gdfo = node.gdfo
+        nodes = self._nodes
+        while pending:
+            node = pending.pop()
+            if node.key in seen:
+                # node already appears in some ancestry
+                continue
+            seen.add(node.key)
+            if node.gdfo <= min_gdfo:
+                continue
+            if node.parents:
+                pending.extend(node.parents)
+        heads = heads_key.difference(seen)
+        if self.do_cache:
+            self._known_heads[heads_key] = heads
+        return heads
+
+    def xheads(self, keys):
+        """Return the heads from amongst keys.
+
+        This is done by searching the ancestries of each key.  Any key that is
+        reachable from another key is not returned; all the others are.
+
         This operation scales with the relative depth between any two keys. If
         any two keys are completely disconnected all ancestry of both sides
         will be retrieved.

=== modified file 'tools/time_graph.py'
--- a/tools/time_graph.py	2009-06-16 09:37:53 +0000
+++ b/tools/time_graph.py	2009-06-18 09:51:38 +0000
@@ -22,6 +22,7 @@
 trace.enable_default_logging()
 ui.ui_factory = text.TextUIFactory()
 
+t1 = time.clock()
 if len(args) >= 1:
     b = branch.Branch.open(args[0])
 else:
@@ -33,8 +34,9 @@
                          if p[1] is not None)
 finally:
     b.unlock()
+t2 = time.clock()
 
-print 'Found %d nodes' % (len(parent_map),)
+print 'Found %d nodes, loaded in %.3fs' % (len(parent_map), t2-t1)
 
 def all_heads_comp(g, combinations):
     h = []
@@ -84,14 +86,14 @@
     h_known = all_heads_comp(known_g, combinations)
 t2 = time.clock()
 print "Known (pyx): %.3fs" % (t2-t1,)
-#print "  %s" % (graph._counters,)
+print "  %s" % (graph._counters,)
 simple_g = graph.Graph(graph.DictParentsProvider(parent_map))
 graph._counters[1] = 0
 graph._counters[2] = 0
 h_simple = all_heads_comp(simple_g, combinations)
 t3 = time.clock()
 print "Orig: %.3fs" % (t3-t2,)
-#print "  %s" % (graph._counters,)
+print "  %s" % (graph._counters,)
 if h_simple != h_known:
     import pdb; pdb.set_trace()
 print 'ratio: %.3fs' % ((t2-t1) / (t3-t2))



More information about the bazaar-commits mailing list