Rev 4646: Implement initial gc_sort in pyrex. in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-stable-groupcompress-order

John Arbash Meinel john at arbash-meinel.com
Tue Aug 25 19:42:39 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-stable-groupcompress-order

------------------------------------------------------------
revno: 4646
revision-id: john at arbash-meinel.com-20090825184219-tbh5i6bvyqe6sp6e
parent: john at arbash-meinel.com-20090824210650-bgltzh9dlb3wdgyu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-stable-groupcompress-order
timestamp: Tue 2009-08-25 13:42:19 -0500
message:
  Implement initial gc_sort in pyrex.
  No timings or much optimization yet.
  
  For now, we are 'stable enough'. We'll have to consider what we need for long-term
  stability.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-08-24 21:06:50 +0000
+++ b/bzrlib/_known_graph_py.py	2009-08-25 18:42:19 +0000
@@ -256,7 +256,7 @@
                 #     # Ghost node, skip it
                 #     continue
                 result.append(node.key)
-                for parent_key in sorted(node.parent_keys):
+                for parent_key in sorted(node.parent_keys, reverse=True):
                     parent_node = self._nodes[parent_key]
                     seen_children = num_seen_children[parent_key] + 1
                     if seen_children == len(parent_node.child_keys):
@@ -265,7 +265,7 @@
                         # This has been queued up, stop tracking it
                         del num_seen_children[parent_key]
                     else:
-                        num_seen_parents[parent_key] = seen_children
+                        num_seen_children[parent_key] = seen_children
         return result
 
     def merge_sort(self, tip_key):

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-08-18 21:41:08 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-08-25 18:42:19 +0000
@@ -25,6 +25,8 @@
     ctypedef struct PyObject:
         pass
 
+    int PyString_CheckExact(object)
+
     object PyTuple_New(Py_ssize_t n)
     Py_ssize_t PyTuple_GET_SIZE(object t)
     PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
@@ -116,6 +118,36 @@
     return <_KnownGraphNode>temp_node
 
 
+def get_key(node):
+    cdef _KnownGraphNode real_node
+    real_node = <_KnownGraphNode>node
+    return real_node.key
+
+
+cdef object _sort_list_nodes(object lst, int reverse):
+    """Sort a list of _KnownGraphNode objects."""
+    cdef _KnownGraphNode node1, node2
+    cdef int do_swap
+
+    if PyList_GET_SIZE(lst) == 0 or PyList_GET_SIZE(lst) == 1:
+        return lst
+    if PyList_GET_SIZE(lst) == 2:
+        node1 = _get_list_node(lst, 0)
+        node2 = _get_list_node(lst, 1)
+        if reverse:
+            do_swap = (node1.key < node2.key)
+        else:
+            do_swap = (node2.key < node1.key)
+        if do_swap:
+            Py_INCREF(node1)
+            PyList_SetItem(lst, 0, node2)
+            Py_INCREF(node2)
+            PyList_SetItem(lst, 1, node1)
+        return lst
+    # For all other sizes, we just use 'sorted()'
+    return sorted(lst, key=get_key, reverse=reverse)
+
+
 cdef class _MergeSorter
 
 cdef class KnownGraph:
@@ -216,6 +248,19 @@
                 PyList_Append(tails, node)
         return tails
 
+    def _find_tips(self):
+        cdef PyObject *temp_node
+        cdef _KnownGraphNode node
+        cdef Py_ssize_t pos
+
+        tips = []
+        pos = 0
+        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            if PyList_GET_SIZE(node.children) == 0:
+                PyList_Append(tips, node)
+        return tips
+
     def _find_gdfo(self):
         cdef _KnownGraphNode node
         cdef _KnownGraphNode child
@@ -397,6 +442,54 @@
         # We started from the parents, so we don't need to do anymore work
         return topo_order
 
+    def gc_sort(self):
+        """Return a reverse topological ordering which is 'stable'.
+
+        There are a few constraints:
+          1) Reverse topological (all children before all parents)
+          2) Grouped by prefix
+          3) 'stable' sorting, so that we get the same result, independent of
+             machine, or extra data.
+        To do this, we use the same basic algorithm as topo_sort, but when we
+        aren't sure what node to access next, we sort them lexicographically.
+        """
+        cdef PyObject *temp
+        cdef Py_ssize_t pos
+        cdef _KnownGraphNode node, node2, parent_node
+
+        tips = self._find_tips()
+        # Split the tips based on prefix
+        prefix_tips = {}
+        for pos from 0 <= pos < PyList_GET_SIZE(tips):
+            node = _get_list_node(tips, pos)
+            if PyString_CheckExact(node.key) or len(node.key) == 1:
+                prefix = ''
+            else:
+                prefix = node.key[0]
+            prefix_tips.setdefault(prefix, []).append(node)
+
+        result = []
+        for prefix, nodes in sorted(prefix_tips.iteritems()):
+            pending = _sort_list_nodes(nodes, 1)
+            while pending:
+                node = pending.pop()
+                # TODO: test ghosts
+                # if node.parent_keys is None:
+                #     # Ghost node, skip it
+                #     continue
+                result.append(node.key)
+                parents = _sort_list_nodes(list(node.parents), 1)
+                for pos from 0 <= pos < PyList_GET_SIZE(parents):
+                    parent_node = _get_list_node(parents, pos)
+                    # TODO: GraphCycle detection
+                    parent_node.seen = parent_node.seen + 1
+                    if (parent_node.seen
+                        == PyList_GET_SIZE(parent_node.children)):
+                        # All children have been processed, queue up this
+                        # parent
+                        pending.append(parent_node)
+                        parent_node.seen = 0
+        return result
 
     def merge_sort(self, tip_key):
         """Compute the merge sorted graph output."""

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-08-24 20:45:24 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-08-25 18:42:19 +0000
@@ -798,4 +798,18 @@
 
     def test_stable_sorting(self):
         # the sort order should be stable even when extra nodes are added
-        pass
+        self.assertSorted(['b', 'c', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',)})
+        self.assertSorted(['b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+        self.assertSorted(['b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+        self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+                           'Z':('a',)})
+        self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+                           'Z':('a',),
+                           'e':('b', 'c', 'd'),
+                           'f':('d', 'Z'),
+                           })



More information about the bazaar-commits mailing list