Rev 4667: (jam) Merge 2.0 4649 into bzr.dev in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Sep 2 19:08:03 BST 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4667 [merge]
revision-id: pqm at pqm.ubuntu.com-20090902180758-uuuxc8dd62vq67i8
parent: pqm at pqm.ubuntu.com-20090902090616-8h9ung293q3zp4m0
parent: pqm at pqm.ubuntu.com-20090902154814-ter903a552n68ahb
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-09-02 19:07:58 +0100
message:
  (jam) Merge 2.0 4649 into bzr.dev
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/_known_graph_py.py      _known_graph_py.py-20090610185421-vw8vfda2cgnckgb1-1
  bzrlib/_known_graph_pyx.pyx    _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
  bzrlib/tests/test__known_graph.py test__known_graph.py-20090610185421-vw8vfda2cgnckgb1-2
=== modified file 'NEWS'
--- a/NEWS	2009-09-02 08:29:07 +0000
+++ b/NEWS	2009-09-02 18:07:58 +0000
@@ -73,6 +73,10 @@
   that has a ghost in the mainline ancestry.
   (John Arbash Meinel, #419241)
 
+* ``groupcompress`` sort order is now more stable, rather than relying on
+  ``topo_sort`` ordering. The implementation is now
+  ``KnownGraph.gc_sort``. (John Arbash Meinel)
+
 * Local data conversion will generate correct deltas. This is a critical
   bugfix vs 2.0rc1, and all 2.0rc1 users should upgrade to 2.0rc2 before
   converting repositories. (Robert Collins, #422849)

=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py	2009-08-17 20:41:26 +0000
+++ b/bzrlib/_known_graph_py.py	2009-08-25 18:45:40 +0000
@@ -97,6 +97,10 @@
         return [node for node in self._nodes.itervalues()
                 if not node.parent_keys]
 
+    def _find_tips(self):
+        return [node for node in self._nodes.itervalues()
+                      if not node.child_keys]
+
     def _find_gdfo(self):
         nodes = self._nodes
         known_parent_gdfos = {}
@@ -218,6 +222,51 @@
         # We started from the parents, so we don't need to do anymore work
         return topo_order
 
+    def gc_sort(self):
+        """Return a reverse topological ordering which is 'stable'.
+
+        There are a few constraints:
+          1) Reverse topological (all children before all parents)
+          2) Grouped by prefix
+          3) 'stable' sorting, so that we get the same result, independent of
+             machine, or extra data.
+        To do this, we use the same basic algorithm as topo_sort, but when we
+        aren't sure what node to access next, we sort them lexicographically.
+        """
+        tips = self._find_tips()
+        # Split the tips based on prefix
+        prefix_tips = {}
+        for node in tips:
+            if node.key.__class__ is str or len(node.key) == 1:
+                prefix = ''
+            else:
+                prefix = node.key[0]
+            prefix_tips.setdefault(prefix, []).append(node)
+
+        num_seen_children = dict.fromkeys(self._nodes, 0)
+
+        result = []
+        for prefix in sorted(prefix_tips):
+            pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
+                             reverse=True)
+            while pending:
+                node = pending.pop()
+                if node.parent_keys is None:
+                    # Ghost node, skip it
+                    continue
+                result.append(node.key)
+                for parent_key in sorted(node.parent_keys, reverse=True):
+                    parent_node = self._nodes[parent_key]
+                    seen_children = num_seen_children[parent_key] + 1
+                    if seen_children == len(parent_node.child_keys):
+                        # All children have been processed, enqueue this parent
+                        pending.append(parent_node)
+                        # This has been queued up, stop tracking it
+                        del num_seen_children[parent_key]
+                    else:
+                        num_seen_children[parent_key] = seen_children
+        return result
+
     def merge_sort(self, tip_key):
         """Compute the merge sorted graph output."""
         from bzrlib import tsort

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-08-26 16:03:59 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-09-02 13:32:52 +0000
@@ -25,11 +25,18 @@
     ctypedef struct PyObject:
         pass
 
+    int PyString_CheckExact(object)
+
+    int PyObject_RichCompareBool(object, object, int)
+    int Py_LT
+
+    int PyTuple_CheckExact(object)
     object PyTuple_New(Py_ssize_t n)
     Py_ssize_t PyTuple_GET_SIZE(object t)
     PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
     void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
 
+    int PyList_CheckExact(object)
     Py_ssize_t PyList_GET_SIZE(object l)
     PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
     int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
@@ -108,14 +115,65 @@
     return <_KnownGraphNode>temp_node
 
 
-cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
+cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
     cdef PyObject *temp_node
-    cdef _KnownGraphNode node
 
-    temp_node = PyTuple_GET_ITEM(parents, pos)
+    temp_node = PyTuple_GET_ITEM(tpl, pos)
     return <_KnownGraphNode>temp_node
 
 
+def get_key(node):
+    cdef _KnownGraphNode real_node
+    real_node = node
+    return real_node.key
+
+
+cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
+    """Sort a list of _KnownGraphNode objects.
+
+    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
+    just return the input list if everything is already sorted.
+    """
+    cdef _KnownGraphNode node1, node2
+    cdef int do_swap, is_tuple
+    cdef Py_ssize_t length
+
+    is_tuple = PyTuple_CheckExact(lst_or_tpl)
+    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
+        raise TypeError('lst_or_tpl must be a list or tuple.')
+    length = len(lst_or_tpl)
+    if length == 0 or length == 1:
+        return lst_or_tpl
+    if length == 2:
+        if is_tuple:
+            node1 = _get_tuple_node(lst_or_tpl, 0)
+            node2 = _get_tuple_node(lst_or_tpl, 1)
+        else:
+            node1 = _get_list_node(lst_or_tpl, 0)
+            node2 = _get_list_node(lst_or_tpl, 1)
+        if reverse:
+            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
+        else:
+            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
+        if not do_swap:
+            return lst_or_tpl
+        if is_tuple:
+            return (node2, node1)
+        else:
+            # Swap 'in-place', since lists are mutable
+            Py_INCREF(node1)
+            PyList_SetItem(lst_or_tpl, 1, node1)
+            Py_INCREF(node2)
+            PyList_SetItem(lst_or_tpl, 0, node2)
+            return lst_or_tpl
+    # For all other sizes, we just use 'sorted()'
+    if is_tuple:
+        # Note that sorted() is just list(iterable).sort()
+        lst_or_tpl = list(lst_or_tpl)
+    lst_or_tpl.sort(key=get_key, reverse=reverse)
+    return lst_or_tpl
+
+
 cdef class _MergeSorter
 
 cdef class KnownGraph:
@@ -216,6 +274,19 @@
                 PyList_Append(tails, node)
         return tails
 
+    def _find_tips(self):
+        cdef PyObject *temp_node
+        cdef _KnownGraphNode node
+        cdef Py_ssize_t pos
+
+        tips = []
+        pos = 0
+        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            if PyList_GET_SIZE(node.children) == 0:
+                PyList_Append(tips, node)
+        return tips
+
     def _find_gdfo(self):
         cdef _KnownGraphNode node
         cdef _KnownGraphNode child
@@ -322,7 +393,7 @@
                 continue
             if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
                 for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
-                    parent_node = _get_parent(node.parents, pos)
+                    parent_node = _get_tuple_node(node.parents, pos)
                     last_item = last_item + 1
                     if last_item < PyList_GET_SIZE(pending):
                         Py_INCREF(parent_node) # SetItem steals a ref
@@ -397,6 +468,77 @@
         # We started from the parents, so we don't need to do anymore work
         return topo_order
 
+    def gc_sort(self):
+        """Return a reverse topological ordering which is 'stable'.
+
+        There are a few constraints:
+          1) Reverse topological (all children before all parents)
+          2) Grouped by prefix
+          3) 'stable' sorting, so that we get the same result, independent of
+             machine, or extra data.
+        To do this, we use the same basic algorithm as topo_sort, but when we
+        aren't sure what node to access next, we sort them lexicographically.
+        """
+        cdef PyObject *temp
+        cdef Py_ssize_t pos, last_item
+        cdef _KnownGraphNode node, node2, parent_node
+
+        tips = self._find_tips()
+        # Split the tips based on prefix
+        prefix_tips = {}
+        for pos from 0 <= pos < PyList_GET_SIZE(tips):
+            node = _get_list_node(tips, pos)
+            if PyString_CheckExact(node.key) or len(node.key) == 1:
+                prefix = ''
+            else:
+                prefix = node.key[0]
+            temp = PyDict_GetItem(prefix_tips, prefix)
+            if temp == NULL:
+                prefix_tips[prefix] = [node]
+            else:
+                tip_nodes = <object>temp
+                PyList_Append(tip_nodes, node)
+
+        result = []
+        for prefix in sorted(prefix_tips):
+            temp = PyDict_GetItem(prefix_tips, prefix)
+            assert temp != NULL
+            tip_nodes = <object>temp
+            pending = _sort_list_nodes(tip_nodes, 1)
+            last_item = PyList_GET_SIZE(pending) - 1
+            while last_item >= 0:
+                node = _get_list_node(pending, last_item)
+                last_item = last_item - 1
+                if node.parents is None:
+                    # Ghost
+                    continue
+                PyList_Append(result, node.key)
+                # Sorting the parent keys isn't strictly necessary for stable
+                # sorting of a given graph. But it does help minimize the
+                # differences between graphs
+                # For bzr.dev ancestry:
+                #   4.73ms  no sort
+                #   7.73ms  RichCompareBool sort
+                parents = _sort_list_nodes(node.parents, 1)
+                for pos from 0 <= pos < len(parents):
+                    if PyTuple_CheckExact(parents):
+                        parent_node = _get_tuple_node(parents, pos)
+                    else:
+                        parent_node = _get_list_node(parents, pos)
+                    # TODO: GraphCycle detection
+                    parent_node.seen = parent_node.seen + 1
+                    if (parent_node.seen
+                        == PyList_GET_SIZE(parent_node.children)):
+                        # All children have been processed, queue up this
+                        # parent
+                        last_item = last_item + 1
+                        if last_item < PyList_GET_SIZE(pending):
+                            Py_INCREF(parent_node) # SetItem steals a ref
+                            PyList_SetItem(pending, last_item, parent_node)
+                        else:
+                            PyList_Append(pending, parent_node)
+                        parent_node.seen = 0
+        return result
 
     def merge_sort(self, tip_key):
         """Compute the merge sorted graph output."""
@@ -522,7 +664,7 @@
             raise RuntimeError('ghost nodes should not be pushed'
                                ' onto the stack: %s' % (node,))
         if PyTuple_GET_SIZE(node.parents) > 0:
-            parent_node = _get_parent(node.parents, 0)
+            parent_node = _get_tuple_node(node.parents, 0)
             ms_node.left_parent = parent_node
             if parent_node.parents is None: # left-hand ghost
                 ms_node.left_pending_parent = None
@@ -532,7 +674,7 @@
         if PyTuple_GET_SIZE(node.parents) > 1:
             ms_node.pending_parents = []
             for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
-                parent_node = _get_parent(node.parents, pos)
+                parent_node = _get_tuple_node(node.parents, pos)
                 if parent_node.parents is None: # ghost
                     continue
                 PyList_Append(ms_node.pending_parents, parent_node)

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-08-26 16:03:59 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-09-02 13:32:52 +0000
@@ -768,3 +768,70 @@
                 },
                 'E',
                 [])
+
+
+class TestKnownGraphStableReverseTopoSort(TestCaseWithKnownGraph):
+    """Test the sort order returned by gc_sort."""
+
+    def assertSorted(self, expected, parent_map):
+        graph = self.make_known_graph(parent_map)
+        value = graph.gc_sort()
+        if expected != value:
+            self.assertEqualDiff(pprint.pformat(expected),
+                                 pprint.pformat(value))
+
+    def test_empty(self):
+        self.assertSorted([], {})
+
+    def test_single(self):
+        self.assertSorted(['a'], {'a':()})
+        self.assertSorted([('a',)], {('a',):()})
+        self.assertSorted([('F', 'a')], {('F', 'a'):()})
+
+    def test_linear(self):
+        self.assertSorted(['c', 'b', 'a'], {'a':(), 'b':('a',), 'c':('b',)})
+        self.assertSorted([('c',), ('b',), ('a',)],
+                          {('a',):(), ('b',): (('a',),), ('c',): (('b',),)})
+        self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a')],
+                          {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
+                           ('F', 'c'): (('F', 'b'),)})
+
+    def test_mixed_ancestries(self):
+        # Each prefix should be sorted separately
+        self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a'),
+                           ('G', 'c'), ('G', 'b'), ('G', 'a'),
+                           ('Q', 'c'), ('Q', 'b'), ('Q', 'a'),
+                          ],
+                          {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
+                           ('F', 'c'): (('F', 'b'),),
+                           ('G', 'a'):(), ('G', 'b'): (('G', 'a'),),
+                           ('G', 'c'): (('G', 'b'),),
+                           ('Q', 'a'):(), ('Q', 'b'): (('Q', 'a'),),
+                           ('Q', 'c'): (('Q', 'b'),),
+                          })
+
+    def test_stable_sorting(self):
+        # the sort order should be stable even when extra nodes are added
+        self.assertSorted(['b', 'c', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',)})
+        self.assertSorted(['b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+        self.assertSorted(['b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
+        self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+                           'Z':('a',)})
+        self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
+                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
+                           'Z':('a',),
+                           'e':('b', 'c', 'd'),
+                           'f':('d', 'Z'),
+                           })
+
+    def test_skip_ghost(self):
+        self.assertSorted(['b', 'c', 'a'],
+                          {'a':(), 'b':('a', 'ghost'), 'c':('a',)})
+
+    def test_skip_mainline_ghost(self):
+        self.assertSorted(['b', 'c', 'a'],
+                          {'a':(), 'b':('ghost', 'a'), 'c':('a',)})




More information about the bazaar-commits mailing list