Rev 2542: Avoid topological sorting in Repository.get_ancestry where possible in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Jun 21 06:22:39 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2542
revision-id: pqm at pqm.ubuntu.com-20070621052237-2phm1z5dg4arrwnk
parent: pqm at pqm.ubuntu.com-20070620205507-uuolokc2e0eckrb7
parent: aaron.bentley at utoronto.ca-20070621032939-kcnl1dmxsixygskb
committer: Canonical.com Patch Queue Manager<pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2007-06-21 06:22:37 +0100
message:
  Avoid topological sorting in Repository.get_ancestry where possible
modified:
  bzrlib/branch.py               branch.py-20050309040759-e4baf4e0d046576e
  bzrlib/bundle/serializer/__init__.py __init__.py-20051118175413-86b97db0b618feef
  bzrlib/deprecated_graph.py     graph.py-20050905070950-b47dce53236c5e48
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/merge.py                merge.py-20050513021216-953b65a438527106
  bzrlib/missing.py              missing.py-20050812153334-097f7097e2a8bcd1
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/revision.py             revision.py-20050309040759-e77802c08f3999d5
  bzrlib/status.py               status.py-20050505062338-431bfa63ec9b19e6
  bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
  bzrlib/tests/test_tsort.py     testtsort.py-20051025073946-27da871c394d5be4
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/tsort.py                tsort.py-20051025073946-7808f6aaf7d07208
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 2490.2.33
    merged: aaron.bentley at utoronto.ca-20070621032939-kcnl1dmxsixygskb
    parent: aaron.bentley at utoronto.ca-20070621023343-3f3oy8iszatpjcui
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: graphwalker
    timestamp: Wed 2007-06-20 23:29:39 -0400
    message:
      Disable topological sorting of get_ancestry where sensible
    ------------------------------------------------------------
    revno: 2490.2.32
    merged: aaron.bentley at utoronto.ca-20070621023343-3f3oy8iszatpjcui
    parent: aaron.bentley at utoronto.ca-20070621015829-b62l2d1ehuvgnr3x
    parent: aaron.bentley at utoronto.ca-20070617170704-z3xbz0t5nqddnyeo
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: graphwalker
    timestamp: Wed 2007-06-20 22:33:43 -0400
    message:
      Merge of not-sorting-ancestry branch
        ------------------------------------------------------------
        revno: 2530.1.1
        merged: aaron.bentley at utoronto.ca-20070617170704-z3xbz0t5nqddnyeo
        parent: pqm at pqm.ubuntu.com-20070615082222-98j9j9nbl6p2dx0m
        committer: Aaron Bentley <aaron.bentley at utoronto.ca>
        branch nick: get_ancestry_no_topo
        timestamp: Sun 2007-06-17 13:07:04 -0400
        message:
          Make topological sorting optional for get_ancestry
    ------------------------------------------------------------
    revno: 2490.2.31
    merged: aaron.bentley at utoronto.ca-20070621015829-b62l2d1ehuvgnr3x
    parent: aaron.bentley at utoronto.ca-20070621011219-a967wm19vxxc8dx5
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: graphwalker
    timestamp: Wed 2007-06-20 21:58:29 -0400
    message:
      Fix iter_topo_order to permit un-included parents
    ------------------------------------------------------------
    revno: 2490.2.30
    merged: aaron.bentley at utoronto.ca-20070621011219-a967wm19vxxc8dx5
    parent: abentley at panoramicfeedback.com-20070619145132-xc0rgyfzebvraxb0
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: graphwalker
    timestamp: Wed 2007-06-20 21:12:19 -0400
    message:
      Add functionality for tsorting graphs
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py	2007-06-14 04:36:50 +0000
+++ b/bzrlib/branch.py	2007-06-21 03:29:39 +0000
@@ -1454,7 +1454,8 @@
             # we fetch here regardless of whether we need to so that we pickup
             # filled in ghosts.
             self.fetch(other, stop_revision)
-            my_ancestry = self.repository.get_ancestry(last_rev)
+            my_ancestry = self.repository.get_ancestry(last_rev,
+                                                       topo_sorted=False)
             if stop_revision in my_ancestry:
                 # last_revision is a descendant of stop_revision
                 return
@@ -1813,7 +1814,8 @@
         if master is not None:
             old_tip = self.last_revision()
             self.pull(master, overwrite=True)
-            if old_tip in self.repository.get_ancestry(self.last_revision()):
+            if old_tip in self.repository.get_ancestry(self.last_revision(),
+                                                       topo_sorted=False):
                 return None
             return old_tip
         return None

=== modified file 'bzrlib/bundle/serializer/__init__.py'
--- a/bzrlib/bundle/serializer/__init__.py	2007-03-13 06:55:43 +0000
+++ b/bzrlib/bundle/serializer/__init__.py	2007-06-21 03:29:39 +0000
@@ -120,10 +120,11 @@
     """
     if base_revision_id is NULL_REVISION:
         base_revision_id = None
-    base_ancestry = set(repository.get_ancestry(base_revision_id))
-    revision_ids = [r for r in repository.get_ancestry(revision_id) if r
-                    not in base_ancestry]
-    revision_ids = list(reversed(revision_ids))
+    revision_ids = set(repository.get_ancestry(revision_id, topo_sorted=False))
+    revision_ids.difference_update(repository.get_ancestry(base_revision_id,
+                                   topo_sorted=False))
+    revision_ids = list(repository.get_graph().iter_topo_order(revision_ids))
+    revision_ids.reverse()
     write(repository, revision_ids, out, format,
           forced_bases = {revision_id:base_revision_id})
     return revision_ids

=== modified file 'bzrlib/deprecated_graph.py'
--- a/bzrlib/deprecated_graph.py	2007-06-08 21:48:42 +0000
+++ b/bzrlib/deprecated_graph.py	2007-06-21 02:33:43 +0000
@@ -142,7 +142,7 @@
         """Return a dictionary of graph node:ancestor_list entries."""
         return dict(self._graph_ancestors.items())
 
-    def get_ancestry(self, node_id):
+    def get_ancestry(self, node_id, topo_sorted=True):
         """Return the inclusive ancestors of node_id in topological order."""
         # maybe optimise this ?
         result = {}
@@ -155,6 +155,8 @@
             for parent in parents:
                 if parent not in result and parent not in pending:
                     pending.add(parent)
+        if not topo_sorted:
+            return result.keys()
         return topo_sort(result.items())
 
     def get_descendants(self):

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2007-06-19 14:51:32 +0000
+++ b/bzrlib/graph.py	2007-06-21 01:58:29 +0000
@@ -14,7 +14,10 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-from bzrlib import errors
+from bzrlib import (
+    errors,
+    tsort,
+    )
 from bzrlib.deprecated_graph import (node_distances, select_farthest)
 from bzrlib.revision import NULL_REVISION
 
@@ -275,6 +278,15 @@
                 return lca.pop()
             revisions = lca
 
+    def iter_topo_order(self, revisions):
+        """Iterate through the input revisions in topological order.
+
+        This sorting only ensures that parents come before their children.
+        An ancestor may sort after a descendant if the relationship is not
+        visible in the supplied list of revisions.
+        """
+        return tsort.topo_sort(zip(revisions, self.get_parents(revisions)))
+
 
 class _BreadthFirstSearcher(object):
     """Parallel search the breadth-first the ancestry of revisions.

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2007-04-04 02:31:25 +0000
+++ b/bzrlib/inventory.py	2007-06-21 03:29:39 +0000
@@ -181,7 +181,7 @@
         :param entry_vf: The entry versioned file, if its already available.
         """
         def get_ancestors(weave, entry):
-            return set(weave.get_ancestry(entry.revision))
+            return set(weave.get_ancestry(entry.revision, topo_sorted=False))
         # revision:ie mapping for each ie found in previous_inventories.
         candidates = {}
         # revision:ie mapping with one revision for each head.

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-03-09 22:17:39 +0000
+++ b/bzrlib/knit.py	2007-06-21 03:29:39 +0000
@@ -939,14 +939,14 @@
         except KeyError:
             raise RevisionNotPresent(version_id, self.filename)
 
-    def get_ancestry(self, versions):
+    def get_ancestry(self, versions, topo_sorted=True):
         """See VersionedFile.get_ancestry."""
         if isinstance(versions, basestring):
             versions = [versions]
         if not versions:
             return []
         versions = [osutils.safe_revision_id(v) for v in versions]
-        return self._index.get_ancestry(versions)
+        return self._index.get_ancestry(versions, topo_sorted)
 
     def get_ancestry_with_ghosts(self, versions):
         """See VersionedFile.get_ancestry_with_ghosts."""
@@ -967,7 +967,7 @@
         from bzrlib.weave import Weave
 
         w = Weave(self.filename)
-        ancestry = self.get_ancestry(version_ids)
+        ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))
         sorted_graph = topo_sort(self._index.get_graph())
         version_list = [vid for vid in sorted_graph if vid in ancestry]
         
@@ -982,14 +982,14 @@
         """See VersionedFile.plan_merge."""
         ver_a = osutils.safe_revision_id(ver_a)
         ver_b = osutils.safe_revision_id(ver_b)
-        ancestors_b = set(self.get_ancestry(ver_b))
+        ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))
         def status_a(revision, text):
             if revision in ancestors_b:
                 return 'killed-b', text
             else:
                 return 'new-a', text
         
-        ancestors_a = set(self.get_ancestry(ver_a))
+        ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))
         def status_b(revision, text):
             if revision in ancestors_a:
                 return 'killed-a', text
@@ -1217,7 +1217,7 @@
     def get_graph(self):
         return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
 
-    def get_ancestry(self, versions):
+    def get_ancestry(self, versions, topo_sorted=True):
         """See VersionedFile.get_ancestry."""
         # get a graph of all the mentioned versions:
         graph = {}
@@ -1233,6 +1233,8 @@
             # if not completed and not a ghost
             pending.update([p for p in parents if p not in graph])
             graph[version] = parents
+        if not topo_sorted:
+            return graph.keys()
         return topo_sort(graph.items())
 
     def get_ancestry_with_ghosts(self, versions):

=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py	2007-06-08 21:48:42 +0000
+++ b/bzrlib/merge.py	2007-06-21 03:29:39 +0000
@@ -201,7 +201,8 @@
             return
         if self.other_rev_id is None:
             return
-        ancestry = self.this_branch.repository.get_ancestry(self.this_basis)
+        ancestry = set(self.this_branch.repository.get_ancestry(
+            self.this_basis, topo_sorted=False))
         if self.other_rev_id in ancestry:
             return
         self.this_tree.add_parent_tree((self.other_rev_id, self.other_tree))

=== modified file 'bzrlib/missing.py'
--- a/bzrlib/missing.py	2007-05-24 12:13:06 +0000
+++ b/bzrlib/missing.py	2007-06-21 03:29:39 +0000
@@ -119,7 +119,8 @@
 def _get_ancestry(repository, progress, label, step, rev_history):
     progress.update('%s ancestry' % label, step, 5)
     if len(rev_history) > 0:
-        ancestry = set(repository.get_ancestry(rev_history[-1]))
+        ancestry = set(repository.get_ancestry(rev_history[-1],
+                       topo_sorted=False))
     else:
         ancestry = set()
     return ancestry

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2007-06-08 21:48:42 +0000
+++ b/bzrlib/remote.py	2007-06-21 02:33:43 +0000
@@ -533,9 +533,9 @@
         return self._real_repository.control_weaves
 
     @needs_read_lock
-    def get_ancestry(self, revision_id):
+    def get_ancestry(self, revision_id, topo_sorted=True):
         self._ensure_real()
-        return self._real_repository.get_ancestry(revision_id)
+        return self._real_repository.get_ancestry(revision_id, topo_sorted)
 
     @needs_read_lock
     def get_inventory_weave(self):

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-06-19 14:49:06 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-06-21 02:33:43 +0000
@@ -111,17 +111,18 @@
         return self._fileid_involved_by_set(changed)
 
     @needs_read_lock
-    def get_ancestry(self, revision_id):
+    def get_ancestry(self, revision_id, topo_sorted=True):
         """Return a list of revision-ids integrated by a revision.
         
-        This is topologically sorted.
+        This is topologically sorted, unless 'topo_sorted' is specified as
+        False.
         """
         if revision_id is None:
             return [None]
         revision_id = osutils.safe_revision_id(revision_id)
         vf = self._get_revision_vf()
         try:
-            return [None] + vf.get_ancestry(revision_id)
+            return [None] + vf.get_ancestry(revision_id, topo_sorted)
         except errors.RevisionNotPresent:
             raise errors.NoSuchRevision(self, revision_id)
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-06-09 01:13:31 +0000
+++ b/bzrlib/repository.py	2007-06-21 03:29:39 +0000
@@ -819,7 +819,7 @@
             yield RevisionTree(self, inv, revision_id)
 
     @needs_read_lock
-    def get_ancestry(self, revision_id):
+    def get_ancestry(self, revision_id, topo_sorted=True):
         """Return a list of revision-ids integrated by a revision.
 
         The first element of the list is always None, indicating the origin 
@@ -834,7 +834,7 @@
         if not self.has_revision(revision_id):
             raise errors.NoSuchRevision(self, revision_id)
         w = self.get_inventory_weave()
-        candidates = w.get_ancestry(revision_id)
+        candidates = w.get_ancestry(revision_id, topo_sorted)
         return [None] + candidates # self._eliminate_revisions_not_present(candidates)
 
     @needs_read_lock

=== modified file 'bzrlib/revision.py'
--- a/bzrlib/revision.py	2007-06-08 21:48:42 +0000
+++ b/bzrlib/revision.py	2007-06-21 03:29:39 +0000
@@ -124,7 +124,8 @@
     revisions_source is an object supporting a get_revision operation that
     behaves like Branch's.
     """
-    return (candidate_id in branch.repository.get_ancestry(revision_id))
+    return (candidate_id in branch.repository.get_ancestry(revision_id,
+            topo_sorted=False))
 
 
 def iter_ancestors(revision_id, revision_source, only_present=False):
@@ -258,10 +259,10 @@
                 [revision_a, revision_b])
             # Shortcut the case where one of the tips is already included in
             # the other graphs ancestry.
-            ancestry_a = graph.get_ancestry(revision_a)
+            ancestry_a = graph.get_ancestry(revision_a, topo_sorted=False)
             if revision_b in ancestry_a:
                 return revision_b
-            ancestry_b = graph.get_ancestry(revision_b)
+            ancestry_b = graph.get_ancestry(revision_b, topo_sorted=False)
             if revision_a in ancestry_b:
                 return revision_a
             # convert to a NULL_REVISION based graph.

=== modified file 'bzrlib/status.py'
--- a/bzrlib/status.py	2007-03-11 13:44:18 +0000
+++ b/bzrlib/status.py	2007-06-21 03:29:39 +0000
@@ -194,7 +194,8 @@
         print >>to_file, 'pending merges:'
     if last_revision is not None:
         try:
-            ignore = set(branch.repository.get_ancestry(last_revision))
+            ignore = set(branch.repository.get_ancestry(last_revision,
+                                                        topo_sorted=False))
         except errors.NoSuchRevision:
             # the last revision is a ghost : assume everything is new 
             # except for it

=== modified file 'bzrlib/tests/repository_implementations/test_repository.py'
--- a/bzrlib/tests/repository_implementations/test_repository.py	2007-04-13 06:23:59 +0000
+++ b/bzrlib/tests/repository_implementations/test_repository.py	2007-06-17 17:07:04 +0000
@@ -538,6 +538,11 @@
         self.assertRaises(errors.NoSuchRevision,
                           self.bzrdir.open_repository().get_ancestry, 'orphan')
 
+    def test_get_unsorted_ancestry(self):
+        repo = self.bzrdir.open_repository()
+        self.assertEqual(set(repo.get_ancestry('rev3')),
+                         set(repo.get_ancestry('rev3', topo_sorted=False)))
+
     def test_get_revision_graph(self):
         # we can get a mapping of id->parents for the entire revision graph or bits thereof.
         self.assertEqual({'rev1':[],

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2007-06-19 14:49:06 +0000
+++ b/bzrlib/tests/test_graph.py	2007-06-21 01:58:29 +0000
@@ -121,7 +121,7 @@
                     'rev3a': ['rev2a', 'rev2b'], 'rev3b': ['rev2b', 'rev2c']}
 
 
-class TestGraphWalker(TestCaseWithMemoryTransport):
+class TestGraph(TestCaseWithMemoryTransport):
 
     def make_graph(self, ancestors):
         tree = self.prepare_memory_tree('.')
@@ -280,3 +280,11 @@
                          stacked.get_parents(['rev2', 'rev2']))
         self.assertEqual([['rev4',], ['rev4']],
                          stacked.get_parents(['rev1', 'rev1']))
+
+    def test_iter_topo_order(self):
+        graph = self.make_graph(ancestry_1)
+        args = ['rev2a', 'rev3', 'rev1']
+        topo_args = list(graph.iter_topo_order(args))
+        self.assertEqual(set(args), set(topo_args))
+        self.assertTrue(topo_args.index('rev2a') > topo_args.index('rev1'))
+        self.assertTrue(topo_args.index('rev2a') < topo_args.index('rev3'))

=== modified file 'bzrlib/tests/test_tsort.py'
--- a/bzrlib/tests/test_tsort.py	2006-10-16 10:03:21 +0000
+++ b/bzrlib/tests/test_tsort.py	2007-06-21 01:58:29 +0000
@@ -85,6 +85,12 @@
                                    (8, [0, 1, 4, 5, 6])]),
                                   [0, 1, 2, 3, 4, 5, 6, 7, 8])
 
+    def test_tsort_unincluded_parent(self):
+        """Sort nodes, but don't include some parents in the output"""
+        self.assertSortAndIterate([(0, [1]),
+                                   (1, [2])],
+                                   [1, 0])
+
 
 class MergeSortTests(TestCase):
 

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-03-02 17:09:40 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-06-17 17:07:04 +0000
@@ -384,6 +384,9 @@
         self.assertRaises(RevisionNotPresent,
             f.get_ancestry, ['rM', 'rX'])
 
+        self.assertEqual(set(f.get_ancestry('rM')),
+            set(f.get_ancestry('rM', topo_sorted=False)))
+
     def test_mutate_after_finish(self):
         f = self.get_file()
         f.transaction_finished()

=== modified file 'bzrlib/tsort.py'
--- a/bzrlib/tsort.py	2007-04-19 00:03:01 +0000
+++ b/bzrlib/tsort.py	2007-06-21 01:58:29 +0000
@@ -61,6 +61,7 @@
         """
         # a dict of the graph.
         self._graph = dict(graph)
+        self._visitable = set(self._graph)
         ### if debugging:
         # self._original_graph = dict(graph)
         
@@ -120,6 +121,8 @@
                             # this parent was completed by a child on the
                             # call stack. skip it.
                             continue
+                        if next_node_name not in self._visitable:
+                            continue
                         # otherwise transfer it from the source graph into the
                         # top of the current depth first search stack.
                         try:

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-02-10 02:48:43 +0000
+++ b/bzrlib/versionedfile.py	2007-06-21 02:33:43 +0000
@@ -302,10 +302,13 @@
         """
         raise NotImplementedError(self.get_lines)
 
-    def get_ancestry(self, version_ids):
+    def get_ancestry(self, version_ids, topo_sorted=True):
         """Return a list of all ancestors of given version(s). This
         will not include the null revision.
 
+        This list will not be topologically sorted if topo_sorted=False is
+        passed.
+
         Must raise RevisionNotPresent if any of the given versions are
         not present in file history."""
         if isinstance(version_ids, basestring):

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2007-03-21 01:34:41 +0000
+++ b/bzrlib/weave.py	2007-06-17 17:07:04 +0000
@@ -607,7 +607,7 @@
         else:
             return self.get_ancestry(version_ids)
 
-    def get_ancestry(self, version_ids):
+    def get_ancestry(self, version_ids, topo_sorted=True):
         """See VersionedFile.get_ancestry."""
         if isinstance(version_ids, basestring):
             version_ids = [version_ids]




More information about the bazaar-commits mailing list