Rev 4582: A bit of optimizations for topo_sort, including a pyrex version. in http://bazaar.launchpad.net/~jameinel/bzr/1.18-topo_sort

John Arbash Meinel john at arbash-meinel.com
Wed Aug 5 14:54:17 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.18-topo_sort

------------------------------------------------------------
revno: 4582
revision-id: john at arbash-meinel.com-20090805135409-njfyfwg41juu615d
parent: maarten_bosmans-20090731224855-xpyybv5fuw4werwg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.18-topo_sort
timestamp: Wed 2009-08-05 08:54:09 -0500
message:
  A bit of optimizations for topo_sort, including a pyrex version.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-07-14 16:10:32 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-08-05 13:54:09 +0000
@@ -51,6 +51,71 @@
 NULL_REVISION = revision.NULL_REVISION
 
 
+def topo_sort(graph):
+    cdef Py_ssize_t last_tip, pos
+    cdef PyObject *temp_key, *temp_value
+
+    graph = dict(graph)
+    # this is the stack storing on which the sorted nodes are pushed.
+    node_name_stack = []
+
+    # count the number of children for every node in the graph
+    num_children = dict.fromkeys(graph.iterkeys(), 0)
+    pos = 0
+    while PyDict_Next(graph, &pos, NULL, &temp_value):
+        parents = <object>temp_value
+        for parent in parents: # pretty sure this is a tuple
+            temp_value = PyDict_GetItem(num_children, parent)
+            if temp_value != NULL: # Ignore ghosts
+                n = (<object>temp_value) + 1
+                PyDict_SetItem(num_children, parent, n)
+    # keep track of nodes without children in a separate list
+    tips = []
+    pos = 0
+    while PyDict_Next(num_children, &pos, &temp_key, &temp_value):
+        value = <object>temp_value
+        if value == 0:
+            node_name = <object>temp_key
+            PyList_Append(tips, node_name)
+
+    graph_pop = graph.pop
+    last_tip = len(tips) - 1
+    while last_tip >= 0:
+        # pick a node without a child and add it to the stack.
+        temp_key = PyList_GET_ITEM(tips, last_tip)
+        node_name = <object>temp_key
+        last_tip -= 1
+        PyList_Append(node_name_stack, node_name)
+
+        # the parents of the node lose it as a child; if it was the last
+        # child, add the parent to the list of childless nodes.
+        parents = graph_pop(node_name)
+        for parent in parents:
+            temp_value = PyDict_GetItem(num_children, parent)
+            if temp_value == NULL:
+                # Ghost parent, skip it
+                continue
+            n = (<object>temp_value) - 1
+            PyDict_SetItem(num_children, parent, n)
+            if n == 0:
+                last_tip += 1
+                if PyList_GET_SIZE(tips) > last_tip:
+                    Py_INCREF(parent)
+                    PyList_SetItem(tips, last_tip, parent)
+                else:
+                    PyList_Append(tips, parent)
+
+    # if there are still nodes left in the graph,
+    # that means that there is a cycle
+    if graph:
+        raise errors.GraphCycleError(graph)
+
+    # the nodes where pushed on the stack child first, so this list needs to be
+    # reversed before returning it.
+    node_name_stack.reverse()
+    return node_name_stack
+
+
 cdef class _KnownGraphNode:
     """Represents a single object in the known graph."""
 

=== modified file 'bzrlib/tsort.py'
--- a/bzrlib/tsort.py	2009-07-31 09:23:52 +0000
+++ b/bzrlib/tsort.py	2009-08-05 13:54:09 +0000
@@ -48,27 +48,44 @@
     node_name_stack = []
 
     # count the number of children for every node in the graph
-    nchildren = dict.fromkeys(graph.iterkeys(), 0)
+    num_children = dict.fromkeys(graph.iterkeys(), 0)
     for parents in graph.itervalues():
         for parent in parents:
-            if parent in nchildren:
-                nchildren[parent] += 1
+            try:
+                num_children[parent] += 1
+            except KeyError:
+                # This is a ghost parent, we don't track its children
+                pass
     # keep track of nodes without children in a separate list
-    nochildnodes = [node for (node, n) in nchildren.iteritems() if n == 0]
+    tips = [node for (node, n) in num_children.iteritems() if n == 0]
 
-    while nochildnodes:
+    graph_pop = graph.pop
+    node_name_stack_append = node_name_stack.append
+    tips_append = tips.append
+    tips_pop = tips.pop
+    last_tip = len(tips) - 1
+    while last_tip >= 0:
         # pick a node without a child and add it to the stack.
-        node_name = nochildnodes.pop()
-        node_name_stack.append(node_name)
+        node_name = tips[last_tip]
+        last_tip -= 1
+        node_name_stack_append(node_name)
 
         # the parents of the node lose it as a child; if it was the last
         # child, add the parent to the list of childless nodes.
-        parents = graph.pop(node_name)
+        parents = graph_pop(node_name)
         for parent in parents:
-            if parent in nchildren:
-                nchildren[parent] -= 1
-                if nchildren[parent] == 0:
-                    nochildnodes.append(parent)
+            try:
+                n = num_children[parent] - 1
+            except KeyError:
+                # ghost parent
+                continue
+            num_children[parent] = n
+            if n == 0:
+                last_tip += 1
+                if len(tips) > last_tip:
+                    tips[last_tip] = parent
+                else:
+                    tips_append(parent)
 
     # if there are still nodes left in the graph,
     # that means that there is a cycle



More information about the bazaar-commits mailing list