Rev 4667: (jam) Merge 2.0 4649 into bzr.dev in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Sep 2 19:08:03 BST 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4667 [merge]
revision-id: pqm at pqm.ubuntu.com-20090902180758-uuuxc8dd62vq67i8
parent: pqm at pqm.ubuntu.com-20090902090616-8h9ung293q3zp4m0
parent: pqm at pqm.ubuntu.com-20090902154814-ter903a552n68ahb
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-09-02 19:07:58 +0100
message:
(jam) Merge 2.0 4649 into bzr.dev
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/_known_graph_py.py _known_graph_py.py-20090610185421-vw8vfda2cgnckgb1-1
bzrlib/_known_graph_pyx.pyx _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
bzrlib/tests/test__known_graph.py test__known_graph.py-20090610185421-vw8vfda2cgnckgb1-2
=== modified file 'NEWS'
--- a/NEWS 2009-09-02 08:29:07 +0000
+++ b/NEWS 2009-09-02 18:07:58 +0000
@@ -73,6 +73,10 @@
that has a ghost in the mainline ancestry.
(John Arbash Meinel, #419241)
+* ``groupcompress`` sort order is now more stable, rather than relying on
+ ``topo_sort`` ordering. The implementation is now
+ ``KnownGraph.gc_sort``. (John Arbash Meinel)
+
* Local data conversion will generate correct deltas. This is a critical
bugfix vs 2.0rc1, and all 2.0rc1 users should upgrade to 2.0rc2 before
converting repositories. (Robert Collins, #422849)
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-08-17 20:41:26 +0000
+++ b/bzrlib/_known_graph_py.py 2009-08-25 18:45:40 +0000
@@ -97,6 +97,10 @@
return [node for node in self._nodes.itervalues()
if not node.parent_keys]
+ def _find_tips(self):
+ return [node for node in self._nodes.itervalues()
+ if not node.child_keys]
+
def _find_gdfo(self):
nodes = self._nodes
known_parent_gdfos = {}
@@ -218,6 +222,51 @@
# We started from the parents, so we don't need to do anymore work
return topo_order
+ def gc_sort(self):
+ """Return a reverse topological ordering which is 'stable'.
+
+ There are a few constraints:
+ 1) Reverse topological (all children before all parents)
+ 2) Grouped by prefix
+ 3) 'stable' sorting, so that we get the same result, independent of
+ machine, or extra data.
+ To do this, we use the same basic algorithm as topo_sort, but when we
+ aren't sure what node to access next, we sort them lexicographically.
+ """
+ tips = self._find_tips()
+ # Split the tips based on prefix
+ prefix_tips = {}
+ for node in tips:
+ if node.key.__class__ is str or len(node.key) == 1:
+ prefix = ''
+ else:
+ prefix = node.key[0]
+ prefix_tips.setdefault(prefix, []).append(node)
+
+ num_seen_children = dict.fromkeys(self._nodes, 0)
+
+ result = []
+ for prefix in sorted(prefix_tips):
+ pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
+ reverse=True)
+ while pending:
+ node = pending.pop()
+ if node.parent_keys is None:
+ # Ghost node, skip it
+ continue
+ result.append(node.key)
+ for parent_key in sorted(node.parent_keys, reverse=True):
+ parent_node = self._nodes[parent_key]
+ seen_children = num_seen_children[parent_key] + 1
+ if seen_children == len(parent_node.child_keys):
+ # All children have been processed, enqueue this parent
+ pending.append(parent_node)
+ # This has been queued up, stop tracking it
+ del num_seen_children[parent_key]
+ else:
+ num_seen_children[parent_key] = seen_children
+ return result
+
def merge_sort(self, tip_key):
"""Compute the merge sorted graph output."""
from bzrlib import tsort
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-08-26 16:03:59 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-09-02 13:32:52 +0000
@@ -25,11 +25,18 @@
ctypedef struct PyObject:
pass
+ int PyString_CheckExact(object)
+
+ int PyObject_RichCompareBool(object, object, int)
+ int Py_LT
+
+ int PyTuple_CheckExact(object)
object PyTuple_New(Py_ssize_t n)
Py_ssize_t PyTuple_GET_SIZE(object t)
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
+ int PyList_CheckExact(object)
Py_ssize_t PyList_GET_SIZE(object l)
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
@@ -108,14 +115,65 @@
return <_KnownGraphNode>temp_node
-cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
+cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
cdef PyObject *temp_node
- cdef _KnownGraphNode node
- temp_node = PyTuple_GET_ITEM(parents, pos)
+ temp_node = PyTuple_GET_ITEM(tpl, pos)
return <_KnownGraphNode>temp_node
+def get_key(node):
+ cdef _KnownGraphNode real_node
+ real_node = node
+ return real_node.key
+
+
+cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
+ """Sort a list of _KnownGraphNode objects.
+
+ If lst_or_tpl is a list, it is allowed to mutate in place. It may also
+ just return the input list if everything is already sorted.
+ """
+ cdef _KnownGraphNode node1, node2
+ cdef int do_swap, is_tuple
+ cdef Py_ssize_t length
+
+ is_tuple = PyTuple_CheckExact(lst_or_tpl)
+ if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
+ raise TypeError('lst_or_tpl must be a list or tuple.')
+ length = len(lst_or_tpl)
+ if length == 0 or length == 1:
+ return lst_or_tpl
+ if length == 2:
+ if is_tuple:
+ node1 = _get_tuple_node(lst_or_tpl, 0)
+ node2 = _get_tuple_node(lst_or_tpl, 1)
+ else:
+ node1 = _get_list_node(lst_or_tpl, 0)
+ node2 = _get_list_node(lst_or_tpl, 1)
+ if reverse:
+ do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
+ else:
+ do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
+ if not do_swap:
+ return lst_or_tpl
+ if is_tuple:
+ return (node2, node1)
+ else:
+ # Swap 'in-place', since lists are mutable
+ Py_INCREF(node1)
+ PyList_SetItem(lst_or_tpl, 1, node1)
+ Py_INCREF(node2)
+ PyList_SetItem(lst_or_tpl, 0, node2)
+ return lst_or_tpl
+ # For all other sizes, we just use 'sorted()'
+ if is_tuple:
+ # Note that sorted() is just list(iterable).sort()
+ lst_or_tpl = list(lst_or_tpl)
+ lst_or_tpl.sort(key=get_key, reverse=reverse)
+ return lst_or_tpl
+
+
cdef class _MergeSorter
cdef class KnownGraph:
@@ -216,6 +274,19 @@
PyList_Append(tails, node)
return tails
+ def _find_tips(self):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos
+
+ tips = []
+ pos = 0
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if PyList_GET_SIZE(node.children) == 0:
+ PyList_Append(tips, node)
+ return tips
+
def _find_gdfo(self):
cdef _KnownGraphNode node
cdef _KnownGraphNode child
@@ -322,7 +393,7 @@
continue
if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
- parent_node = _get_parent(node.parents, pos)
+ parent_node = _get_tuple_node(node.parents, pos)
last_item = last_item + 1
if last_item < PyList_GET_SIZE(pending):
Py_INCREF(parent_node) # SetItem steals a ref
@@ -397,6 +468,77 @@
# We started from the parents, so we don't need to do anymore work
return topo_order
+ def gc_sort(self):
+ """Return a reverse topological ordering which is 'stable'.
+
+ There are a few constraints:
+ 1) Reverse topological (all children before all parents)
+ 2) Grouped by prefix
+ 3) 'stable' sorting, so that we get the same result, independent of
+ machine, or extra data.
+ To do this, we use the same basic algorithm as topo_sort, but when we
+ aren't sure what node to access next, we sort them lexicographically.
+ """
+ cdef PyObject *temp
+ cdef Py_ssize_t pos, last_item
+ cdef _KnownGraphNode node, node2, parent_node
+
+ tips = self._find_tips()
+ # Split the tips based on prefix
+ prefix_tips = {}
+ for pos from 0 <= pos < PyList_GET_SIZE(tips):
+ node = _get_list_node(tips, pos)
+ if PyString_CheckExact(node.key) or len(node.key) == 1:
+ prefix = ''
+ else:
+ prefix = node.key[0]
+ temp = PyDict_GetItem(prefix_tips, prefix)
+ if temp == NULL:
+ prefix_tips[prefix] = [node]
+ else:
+ tip_nodes = <object>temp
+ PyList_Append(tip_nodes, node)
+
+ result = []
+ for prefix in sorted(prefix_tips):
+ temp = PyDict_GetItem(prefix_tips, prefix)
+ assert temp != NULL
+ tip_nodes = <object>temp
+ pending = _sort_list_nodes(tip_nodes, 1)
+ last_item = PyList_GET_SIZE(pending) - 1
+ while last_item >= 0:
+ node = _get_list_node(pending, last_item)
+ last_item = last_item - 1
+ if node.parents is None:
+ # Ghost
+ continue
+ PyList_Append(result, node.key)
+ # Sorting the parent keys isn't strictly necessary for stable
+ # sorting of a given graph. But it does help minimize the
+ # differences between graphs
+ # For bzr.dev ancestry:
+ # 4.73ms no sort
+ # 7.73ms RichCompareBool sort
+ parents = _sort_list_nodes(node.parents, 1)
+ for pos from 0 <= pos < len(parents):
+ if PyTuple_CheckExact(parents):
+ parent_node = _get_tuple_node(parents, pos)
+ else:
+ parent_node = _get_list_node(parents, pos)
+ # TODO: GraphCycle detection
+ parent_node.seen = parent_node.seen + 1
+ if (parent_node.seen
+ == PyList_GET_SIZE(parent_node.children)):
+ # All children have been processed, queue up this
+ # parent
+ last_item = last_item + 1
+ if last_item < PyList_GET_SIZE(pending):
+ Py_INCREF(parent_node) # SetItem steals a ref
+ PyList_SetItem(pending, last_item, parent_node)
+ else:
+ PyList_Append(pending, parent_node)
+ parent_node.seen = 0
+ return result
def merge_sort(self, tip_key):
"""Compute the merge sorted graph output."""
@@ -522,7 +664,7 @@
raise RuntimeError('ghost nodes should not be pushed'
' onto the stack: %s' % (node,))
if PyTuple_GET_SIZE(node.parents) > 0:
- parent_node = _get_parent(node.parents, 0)
+ parent_node = _get_tuple_node(node.parents, 0)
ms_node.left_parent = parent_node
if parent_node.parents is None: # left-hand ghost
ms_node.left_pending_parent = None
@@ -532,7 +674,7 @@
if PyTuple_GET_SIZE(node.parents) > 1:
ms_node.pending_parents = []
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
- parent_node = _get_parent(node.parents, pos)
+ parent_node = _get_tuple_node(node.parents, pos)
if parent_node.parents is None: # ghost
continue
PyList_Append(ms_node.pending_parents, parent_node)
=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py 2009-08-26 16:03:59 +0000
+++ b/bzrlib/tests/test__known_graph.py 2009-09-02 13:32:52 +0000
@@ -768,3 +768,70 @@
},
'E',
[])
+
+
+class TestKnownGraphStableReverseTopoSort(TestCaseWithKnownGraph):
+ """Test the sort order returned by gc_sort."""
+
+ def assertSorted(self, expected, parent_map):
+ graph = self.make_known_graph(parent_map)
+ value = graph.gc_sort()
+ if expected != value:
+ self.assertEqualDiff(pprint.pformat(expected),
+ pprint.pformat(value))
+
+ def test_empty(self):
+ self.assertSorted([], {})
+
+ def test_single(self):
+ self.assertSorted(['a'], {'a':()})
+ self.assertSorted([('a',)], {('a',):()})
+ self.assertSorted([('F', 'a')], {('F', 'a'):()})
+
+ def test_linear(self):
+ self.assertSorted(['c', 'b', 'a'], {'a':(), 'b':('a',), 'c':('b',)})
+ self.assertSorted([('c',), ('b',), ('a',)],
+ {('a',):(), ('b',): (('a',),), ('c',): (('b',),)})
+ self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a')],
+ {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
+ ('F', 'c'): (('F', 'b'),)})
+
+ def test_mixed_ancestries(self):
+ # Each prefix should be sorted separately
+ self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a'),
+ ('G', 'c'), ('G', 'b'), ('G', 'a'),
+ ('Q', 'c'), ('Q', 'b'), ('Q', 'a'),
+ ],
+ {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
+ ('F', 'c'): (('F', 'b'),),
+ ('G', 'a'):(), ('G', 'b'): (('G', 'a'),),
+ ('G', 'c'): (('G', 'b'),),
+ ('Q', 'a'):(), ('Q', 'b'): (('Q', 'a'),),
+ ('Q', 'c'): (('Q', 'b'),),
+ })
+
+ def test_stable_sorting(self):
+ # the sort order should be stable even when extra nodes are added
+ self.assertSorted(['b', 'c', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',)})
+ self.assertSorted(['b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+ self.assertSorted(['b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+ self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+ 'Z':('a',)})
+ self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
+ {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+ 'Z':('a',),
+ 'e':('b', 'c', 'd'),
+ 'f':('d', 'Z'),
+ })
+
+ def test_skip_ghost(self):
+ self.assertSorted(['b', 'c', 'a'],
+ {'a':(), 'b':('a', 'ghost'), 'c':('a',)})
+
+ def test_skip_mainline_ghost(self):
+ self.assertSorted(['b', 'c', 'a'],
+ {'a':(), 'b':('ghost', 'a'), 'c':('a',)})
More information about the bazaar-commits
mailing list