Rev 4646: Implement initial gc_sort in pyrex. in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-stable-groupcompress-order
John Arbash Meinel
john at arbash-meinel.com
Tue Aug 25 19:42:39 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-stable-groupcompress-order
------------------------------------------------------------
revno: 4646
revision-id: john at arbash-meinel.com-20090825184219-tbh5i6bvyqe6sp6e
parent: john at arbash-meinel.com-20090824210650-bgltzh9dlb3wdgyu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-stable-groupcompress-order
timestamp: Tue 2009-08-25 13:42:19 -0500
message:
Implement initial gc_sort in pyrex.
No timings or much optimization yet.
For now, we are 'stable enough'. We'll have to consider what we need for long-term
stability.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-08-24 21:06:50 +0000
+++ b/bzrlib/_known_graph_py.py 2009-08-25 18:42:19 +0000
@@ -256,7 +256,7 @@
# # Ghost node, skip it
# continue
result.append(node.key)
- for parent_key in sorted(node.parent_keys):
+ for parent_key in sorted(node.parent_keys, reverse=True):
parent_node = self._nodes[parent_key]
seen_children = num_seen_children[parent_key] + 1
if seen_children == len(parent_node.child_keys):
@@ -265,7 +265,7 @@
# This has been queued up, stop tracking it
del num_seen_children[parent_key]
else:
- num_seen_parents[parent_key] = seen_children
+ num_seen_children[parent_key] = seen_children
return result
def merge_sort(self, tip_key):
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-08-18 21:41:08 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-08-25 18:42:19 +0000
@@ -25,6 +25,8 @@
ctypedef struct PyObject:
pass
+ int PyString_CheckExact(object)
+
object PyTuple_New(Py_ssize_t n)
Py_ssize_t PyTuple_GET_SIZE(object t)
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
@@ -116,6 +118,36 @@
return <_KnownGraphNode>temp_node
+def get_key(node):
+ cdef _KnownGraphNode real_node
+ real_node = <_KnownGraphNode>node
+ return real_node.key
+
+
+cdef object _sort_list_nodes(object lst, int reverse):
+ """Sort a list of _KnownGraphNode objects."""
+ cdef _KnownGraphNode node1, node2
+ cdef int do_swap
+
+ if PyList_GET_SIZE(lst) == 0 or PyList_GET_SIZE(lst) == 1:
+ return lst
+ if PyList_GET_SIZE(lst) == 2:
+ node1 = _get_list_node(lst, 0)
+ node2 = _get_list_node(lst, 1)
+ if reverse:
+ do_swap = (node1.key < node2.key)
+ else:
+ do_swap = (node2.key < node1.key)
+ if do_swap:
+ Py_INCREF(node1)
+ PyList_SetItem(lst, 0, node2)
+ Py_INCREF(node2)
+ PyList_SetItem(lst, 1, node1)
+ return lst
+ # For all other sizes, we just use 'sorted()'
+ return sorted(lst, key=get_key, reverse=reverse)
+
+
cdef class _MergeSorter
cdef class KnownGraph:
@@ -216,6 +248,19 @@
PyList_Append(tails, node)
return tails
+ def _find_tips(self):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos
+
+ tips = []
+ pos = 0
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if PyList_GET_SIZE(node.children) == 0:
+ PyList_Append(tips, node)
+ return tips
+
def _find_gdfo(self):
cdef _KnownGraphNode node
cdef _KnownGraphNode child
@@ -397,6 +442,54 @@
# We started from the parents, so we don't need to do anymore work
return topo_order
+ def gc_sort(self):
+ """Return a reverse topological ordering which is 'stable'.
+
+ There are a few constraints:
+ 1) Reverse topological (all children before all parents)
+ 2) Grouped by prefix
+ 3) 'stable' sorting, so that we get the same result, independent of
+ machine, or extra data.
+ To do this, we use the same basic algorithm as topo_sort, but when we
+ aren't sure what node to access next, we sort them lexicographically.
+ """
+ cdef PyObject *temp
+ cdef Py_ssize_t pos
+ cdef _KnownGraphNode node, node2, parent_node
+
+ tips = self._find_tips()
+ # Split the tips based on prefix
+ prefix_tips = {}
+ for pos from 0 <= pos < PyList_GET_SIZE(tips):
+ node = _get_list_node(tips, pos)
+ if PyString_CheckExact(node.key) or len(node.key) == 1:
+ prefix = ''
+ else:
+ prefix = node.key[0]
+ prefix_tips.setdefault(prefix, []).append(node)
+
+ result = []
+ for prefix, nodes in sorted(prefix_tips.iteritems()):
+ pending = _sort_list_nodes(nodes, 1)
+ while pending:
+ node = pending.pop()
+ # TODO: test ghosts
+ # if node.parent_keys is None:
+ # # Ghost node, skip it
+ # continue
+ result.append(node.key)
+ parents = _sort_list_nodes(list(node.parents), 1)
+ for pos from 0 <= pos < PyList_GET_SIZE(parents):
+ parent_node = _get_list_node(parents, pos)
+ # TODO: GraphCycle detection
+ parent_node.seen = parent_node.seen + 1
+ if (parent_node.seen
+ == PyList_GET_SIZE(parent_node.children)):
+ # All children have been processed, queue up this
+ # parent
+ pending.append(parent_node)
+ parent_node.seen = 0
+ return result
def merge_sort(self, tip_key):
"""Compute the merge sorted graph output."""
=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py 2009-08-24 20:45:24 +0000
+++ b/bzrlib/tests/test__known_graph.py 2009-08-25 18:42:19 +0000
@@ -798,4 +798,18 @@
def test_stable_sorting(self):
# the sort order should be stable even when extra nodes are added
- pass
+ self.assertSorted(['b', 'c', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',)})
+ self.assertSorted(['b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+ self.assertSorted(['b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+ self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+ 'Z':('a',)})
+ self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+ 'Z':('a',),
+ 'e':('b', 'c', 'd'),
+ 'f':('d', 'Z'),
+ })
More information about the bazaar-commits
mailing list