Rev 2542: Avoid topological sorting in Repository.get_ancestry where possible in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Jun 21 06:22:39 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2542
revision-id: pqm at pqm.ubuntu.com-20070621052237-2phm1z5dg4arrwnk
parent: pqm at pqm.ubuntu.com-20070620205507-uuolokc2e0eckrb7
parent: aaron.bentley at utoronto.ca-20070621032939-kcnl1dmxsixygskb
committer: Canonical.com Patch Queue Manager<pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2007-06-21 06:22:37 +0100
message:
Avoid topological sorting in Repository.get_ancestry where possible
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/bundle/serializer/__init__.py __init__.py-20051118175413-86b97db0b618feef
bzrlib/deprecated_graph.py graph.py-20050905070950-b47dce53236c5e48
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/merge.py merge.py-20050513021216-953b65a438527106
bzrlib/missing.py missing.py-20050812153334-097f7097e2a8bcd1
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/revision.py revision.py-20050309040759-e77802c08f3999d5
bzrlib/status.py status.py-20050505062338-431bfa63ec9b19e6
bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
bzrlib/tests/test_tsort.py testtsort.py-20051025073946-27da871c394d5be4
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/tsort.py tsort.py-20051025073946-7808f6aaf7d07208
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
------------------------------------------------------------
revno: 2490.2.33
merged: aaron.bentley at utoronto.ca-20070621032939-kcnl1dmxsixygskb
parent: aaron.bentley at utoronto.ca-20070621023343-3f3oy8iszatpjcui
committer: Aaron Bentley <aaron.bentley at utoronto.ca>
branch nick: graphwalker
timestamp: Wed 2007-06-20 23:29:39 -0400
message:
Disable topological sorting of get_ancestry where sensible
------------------------------------------------------------
revno: 2490.2.32
merged: aaron.bentley at utoronto.ca-20070621023343-3f3oy8iszatpjcui
parent: aaron.bentley at utoronto.ca-20070621015829-b62l2d1ehuvgnr3x
parent: aaron.bentley at utoronto.ca-20070617170704-z3xbz0t5nqddnyeo
committer: Aaron Bentley <aaron.bentley at utoronto.ca>
branch nick: graphwalker
timestamp: Wed 2007-06-20 22:33:43 -0400
message:
Merge of not-sorting-ancestry branch
------------------------------------------------------------
revno: 2530.1.1
merged: aaron.bentley at utoronto.ca-20070617170704-z3xbz0t5nqddnyeo
parent: pqm at pqm.ubuntu.com-20070615082222-98j9j9nbl6p2dx0m
committer: Aaron Bentley <aaron.bentley at utoronto.ca>
branch nick: get_ancestry_no_topo
timestamp: Sun 2007-06-17 13:07:04 -0400
message:
Make topological sorting optional for get_ancestry
------------------------------------------------------------
revno: 2490.2.31
merged: aaron.bentley at utoronto.ca-20070621015829-b62l2d1ehuvgnr3x
parent: aaron.bentley at utoronto.ca-20070621011219-a967wm19vxxc8dx5
committer: Aaron Bentley <aaron.bentley at utoronto.ca>
branch nick: graphwalker
timestamp: Wed 2007-06-20 21:58:29 -0400
message:
Fix iter_topo_order to permit un-included parents
------------------------------------------------------------
revno: 2490.2.30
merged: aaron.bentley at utoronto.ca-20070621011219-a967wm19vxxc8dx5
parent: abentley at panoramicfeedback.com-20070619145132-xc0rgyfzebvraxb0
committer: Aaron Bentley <aaron.bentley at utoronto.ca>
branch nick: graphwalker
timestamp: Wed 2007-06-20 21:12:19 -0400
message:
Add functionality for tsorting graphs
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py 2007-06-14 04:36:50 +0000
+++ b/bzrlib/branch.py 2007-06-21 03:29:39 +0000
@@ -1454,7 +1454,8 @@
# we fetch here regardless of whether we need to so that we pickup
# filled in ghosts.
self.fetch(other, stop_revision)
- my_ancestry = self.repository.get_ancestry(last_rev)
+ my_ancestry = self.repository.get_ancestry(last_rev,
+ topo_sorted=False)
if stop_revision in my_ancestry:
# last_revision is a descendant of stop_revision
return
@@ -1813,7 +1814,8 @@
if master is not None:
old_tip = self.last_revision()
self.pull(master, overwrite=True)
- if old_tip in self.repository.get_ancestry(self.last_revision()):
+ if old_tip in self.repository.get_ancestry(self.last_revision(),
+ topo_sorted=False):
return None
return old_tip
return None
=== modified file 'bzrlib/bundle/serializer/__init__.py'
--- a/bzrlib/bundle/serializer/__init__.py 2007-03-13 06:55:43 +0000
+++ b/bzrlib/bundle/serializer/__init__.py 2007-06-21 03:29:39 +0000
@@ -120,10 +120,11 @@
"""
if base_revision_id is NULL_REVISION:
base_revision_id = None
- base_ancestry = set(repository.get_ancestry(base_revision_id))
- revision_ids = [r for r in repository.get_ancestry(revision_id) if r
- not in base_ancestry]
- revision_ids = list(reversed(revision_ids))
+ revision_ids = set(repository.get_ancestry(revision_id, topo_sorted=False))
+ revision_ids.difference_update(repository.get_ancestry(base_revision_id,
+ topo_sorted=False))
+ revision_ids = list(repository.get_graph().iter_topo_order(revision_ids))
+ revision_ids.reverse()
write(repository, revision_ids, out, format,
forced_bases = {revision_id:base_revision_id})
return revision_ids
=== modified file 'bzrlib/deprecated_graph.py'
--- a/bzrlib/deprecated_graph.py 2007-06-08 21:48:42 +0000
+++ b/bzrlib/deprecated_graph.py 2007-06-21 02:33:43 +0000
@@ -142,7 +142,7 @@
"""Return a dictionary of graph node:ancestor_list entries."""
return dict(self._graph_ancestors.items())
- def get_ancestry(self, node_id):
+ def get_ancestry(self, node_id, topo_sorted=True):
"""Return the inclusive ancestors of node_id in topological order."""
# maybe optimise this ?
result = {}
@@ -155,6 +155,8 @@
for parent in parents:
if parent not in result and parent not in pending:
pending.add(parent)
+ if not topo_sorted:
+ return result.keys()
return topo_sort(result.items())
def get_descendants(self):
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-06-19 14:51:32 +0000
+++ b/bzrlib/graph.py 2007-06-21 01:58:29 +0000
@@ -14,7 +14,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-from bzrlib import errors
+from bzrlib import (
+ errors,
+ tsort,
+ )
from bzrlib.deprecated_graph import (node_distances, select_farthest)
from bzrlib.revision import NULL_REVISION
@@ -275,6 +278,15 @@
return lca.pop()
revisions = lca
+ def iter_topo_order(self, revisions):
+ """Iterate through the input revisions in topological order.
+
+ This sorting only ensures that parents come before their children.
+ An ancestor may sort after a descendant if the relationship is not
+ visible in the supplied list of revisions.
+ """
+ return tsort.topo_sort(zip(revisions, self.get_parents(revisions)))
+
class _BreadthFirstSearcher(object):
"""Parallel search the breadth-first the ancestry of revisions.
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2007-04-04 02:31:25 +0000
+++ b/bzrlib/inventory.py 2007-06-21 03:29:39 +0000
@@ -181,7 +181,7 @@
:param entry_vf: The entry versioned file, if its already available.
"""
def get_ancestors(weave, entry):
- return set(weave.get_ancestry(entry.revision))
+ return set(weave.get_ancestry(entry.revision, topo_sorted=False))
# revision:ie mapping for each ie found in previous_inventories.
candidates = {}
# revision:ie mapping with one revision for each head.
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-03-09 22:17:39 +0000
+++ b/bzrlib/knit.py 2007-06-21 03:29:39 +0000
@@ -939,14 +939,14 @@
except KeyError:
raise RevisionNotPresent(version_id, self.filename)
- def get_ancestry(self, versions):
+ def get_ancestry(self, versions, topo_sorted=True):
"""See VersionedFile.get_ancestry."""
if isinstance(versions, basestring):
versions = [versions]
if not versions:
return []
versions = [osutils.safe_revision_id(v) for v in versions]
- return self._index.get_ancestry(versions)
+ return self._index.get_ancestry(versions, topo_sorted)
def get_ancestry_with_ghosts(self, versions):
"""See VersionedFile.get_ancestry_with_ghosts."""
@@ -967,7 +967,7 @@
from bzrlib.weave import Weave
w = Weave(self.filename)
- ancestry = self.get_ancestry(version_ids)
+ ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))
sorted_graph = topo_sort(self._index.get_graph())
version_list = [vid for vid in sorted_graph if vid in ancestry]
@@ -982,14 +982,14 @@
"""See VersionedFile.plan_merge."""
ver_a = osutils.safe_revision_id(ver_a)
ver_b = osutils.safe_revision_id(ver_b)
- ancestors_b = set(self.get_ancestry(ver_b))
+ ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))
def status_a(revision, text):
if revision in ancestors_b:
return 'killed-b', text
else:
return 'new-a', text
- ancestors_a = set(self.get_ancestry(ver_a))
+ ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))
def status_b(revision, text):
if revision in ancestors_a:
return 'killed-a', text
@@ -1217,7 +1217,7 @@
def get_graph(self):
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
- def get_ancestry(self, versions):
+ def get_ancestry(self, versions, topo_sorted=True):
"""See VersionedFile.get_ancestry."""
# get a graph of all the mentioned versions:
graph = {}
@@ -1233,6 +1233,8 @@
# if not completed and not a ghost
pending.update([p for p in parents if p not in graph])
graph[version] = parents
+ if not topo_sorted:
+ return graph.keys()
return topo_sort(graph.items())
def get_ancestry_with_ghosts(self, versions):
=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py 2007-06-08 21:48:42 +0000
+++ b/bzrlib/merge.py 2007-06-21 03:29:39 +0000
@@ -201,7 +201,8 @@
return
if self.other_rev_id is None:
return
- ancestry = self.this_branch.repository.get_ancestry(self.this_basis)
+ ancestry = set(self.this_branch.repository.get_ancestry(
+ self.this_basis, topo_sorted=False))
if self.other_rev_id in ancestry:
return
self.this_tree.add_parent_tree((self.other_rev_id, self.other_tree))
=== modified file 'bzrlib/missing.py'
--- a/bzrlib/missing.py 2007-05-24 12:13:06 +0000
+++ b/bzrlib/missing.py 2007-06-21 03:29:39 +0000
@@ -119,7 +119,8 @@
def _get_ancestry(repository, progress, label, step, rev_history):
progress.update('%s ancestry' % label, step, 5)
if len(rev_history) > 0:
- ancestry = set(repository.get_ancestry(rev_history[-1]))
+ ancestry = set(repository.get_ancestry(rev_history[-1],
+ topo_sorted=False))
else:
ancestry = set()
return ancestry
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2007-06-08 21:48:42 +0000
+++ b/bzrlib/remote.py 2007-06-21 02:33:43 +0000
@@ -533,9 +533,9 @@
return self._real_repository.control_weaves
@needs_read_lock
- def get_ancestry(self, revision_id):
+ def get_ancestry(self, revision_id, topo_sorted=True):
self._ensure_real()
- return self._real_repository.get_ancestry(revision_id)
+ return self._real_repository.get_ancestry(revision_id, topo_sorted)
@needs_read_lock
def get_inventory_weave(self):
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-06-19 14:49:06 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-06-21 02:33:43 +0000
@@ -111,17 +111,18 @@
return self._fileid_involved_by_set(changed)
@needs_read_lock
- def get_ancestry(self, revision_id):
+ def get_ancestry(self, revision_id, topo_sorted=True):
"""Return a list of revision-ids integrated by a revision.
- This is topologically sorted.
+ This is topologically sorted, unless 'topo_sorted' is specified as
+ False.
"""
if revision_id is None:
return [None]
revision_id = osutils.safe_revision_id(revision_id)
vf = self._get_revision_vf()
try:
- return [None] + vf.get_ancestry(revision_id)
+ return [None] + vf.get_ancestry(revision_id, topo_sorted)
except errors.RevisionNotPresent:
raise errors.NoSuchRevision(self, revision_id)
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-06-09 01:13:31 +0000
+++ b/bzrlib/repository.py 2007-06-21 03:29:39 +0000
@@ -819,7 +819,7 @@
yield RevisionTree(self, inv, revision_id)
@needs_read_lock
- def get_ancestry(self, revision_id):
+ def get_ancestry(self, revision_id, topo_sorted=True):
"""Return a list of revision-ids integrated by a revision.
The first element of the list is always None, indicating the origin
@@ -834,7 +834,7 @@
if not self.has_revision(revision_id):
raise errors.NoSuchRevision(self, revision_id)
w = self.get_inventory_weave()
- candidates = w.get_ancestry(revision_id)
+ candidates = w.get_ancestry(revision_id, topo_sorted)
return [None] + candidates # self._eliminate_revisions_not_present(candidates)
@needs_read_lock
=== modified file 'bzrlib/revision.py'
--- a/bzrlib/revision.py 2007-06-08 21:48:42 +0000
+++ b/bzrlib/revision.py 2007-06-21 03:29:39 +0000
@@ -124,7 +124,8 @@
revisions_source is an object supporting a get_revision operation that
behaves like Branch's.
"""
- return (candidate_id in branch.repository.get_ancestry(revision_id))
+ return (candidate_id in branch.repository.get_ancestry(revision_id,
+ topo_sorted=False))
def iter_ancestors(revision_id, revision_source, only_present=False):
@@ -258,10 +259,10 @@
[revision_a, revision_b])
# Shortcut the case where one of the tips is already included in
# the other graphs ancestry.
- ancestry_a = graph.get_ancestry(revision_a)
+ ancestry_a = graph.get_ancestry(revision_a, topo_sorted=False)
if revision_b in ancestry_a:
return revision_b
- ancestry_b = graph.get_ancestry(revision_b)
+ ancestry_b = graph.get_ancestry(revision_b, topo_sorted=False)
if revision_a in ancestry_b:
return revision_a
# convert to a NULL_REVISION based graph.
=== modified file 'bzrlib/status.py'
--- a/bzrlib/status.py 2007-03-11 13:44:18 +0000
+++ b/bzrlib/status.py 2007-06-21 03:29:39 +0000
@@ -194,7 +194,8 @@
print >>to_file, 'pending merges:'
if last_revision is not None:
try:
- ignore = set(branch.repository.get_ancestry(last_revision))
+ ignore = set(branch.repository.get_ancestry(last_revision,
+ topo_sorted=False))
except errors.NoSuchRevision:
# the last revision is a ghost : assume everything is new
# except for it
=== modified file 'bzrlib/tests/repository_implementations/test_repository.py'
--- a/bzrlib/tests/repository_implementations/test_repository.py 2007-04-13 06:23:59 +0000
+++ b/bzrlib/tests/repository_implementations/test_repository.py 2007-06-17 17:07:04 +0000
@@ -538,6 +538,11 @@
self.assertRaises(errors.NoSuchRevision,
self.bzrdir.open_repository().get_ancestry, 'orphan')
+ def test_get_unsorted_ancestry(self):
+ repo = self.bzrdir.open_repository()
+ self.assertEqual(set(repo.get_ancestry('rev3')),
+ set(repo.get_ancestry('rev3', topo_sorted=False)))
+
def test_get_revision_graph(self):
# we can get a mapping of id->parents for the entire revision graph or bits thereof.
self.assertEqual({'rev1':[],
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-06-19 14:49:06 +0000
+++ b/bzrlib/tests/test_graph.py 2007-06-21 01:58:29 +0000
@@ -121,7 +121,7 @@
'rev3a': ['rev2a', 'rev2b'], 'rev3b': ['rev2b', 'rev2c']}
-class TestGraphWalker(TestCaseWithMemoryTransport):
+class TestGraph(TestCaseWithMemoryTransport):
def make_graph(self, ancestors):
tree = self.prepare_memory_tree('.')
@@ -280,3 +280,11 @@
stacked.get_parents(['rev2', 'rev2']))
self.assertEqual([['rev4',], ['rev4']],
stacked.get_parents(['rev1', 'rev1']))
+
+ def test_iter_topo_order(self):
+ graph = self.make_graph(ancestry_1)
+ args = ['rev2a', 'rev3', 'rev1']
+ topo_args = list(graph.iter_topo_order(args))
+ self.assertEqual(set(args), set(topo_args))
+ self.assertTrue(topo_args.index('rev2a') > topo_args.index('rev1'))
+ self.assertTrue(topo_args.index('rev2a') < topo_args.index('rev3'))
=== modified file 'bzrlib/tests/test_tsort.py'
--- a/bzrlib/tests/test_tsort.py 2006-10-16 10:03:21 +0000
+++ b/bzrlib/tests/test_tsort.py 2007-06-21 01:58:29 +0000
@@ -85,6 +85,12 @@
(8, [0, 1, 4, 5, 6])]),
[0, 1, 2, 3, 4, 5, 6, 7, 8])
+ def test_tsort_unincluded_parent(self):
+ """Sort nodes, but don't include some parents in the output"""
+ self.assertSortAndIterate([(0, [1]),
+ (1, [2])],
+ [1, 0])
+
class MergeSortTests(TestCase):
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2007-03-02 17:09:40 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2007-06-17 17:07:04 +0000
@@ -384,6 +384,9 @@
self.assertRaises(RevisionNotPresent,
f.get_ancestry, ['rM', 'rX'])
+ self.assertEqual(set(f.get_ancestry('rM')),
+ set(f.get_ancestry('rM', topo_sorted=False)))
+
def test_mutate_after_finish(self):
f = self.get_file()
f.transaction_finished()
=== modified file 'bzrlib/tsort.py'
--- a/bzrlib/tsort.py 2007-04-19 00:03:01 +0000
+++ b/bzrlib/tsort.py 2007-06-21 01:58:29 +0000
@@ -61,6 +61,7 @@
"""
# a dict of the graph.
self._graph = dict(graph)
+ self._visitable = set(self._graph)
### if debugging:
# self._original_graph = dict(graph)
@@ -120,6 +121,8 @@
# this parent was completed by a child on the
# call stack. skip it.
continue
+ if next_node_name not in self._visitable:
+ continue
# otherwise transfer it from the source graph into the
# top of the current depth first search stack.
try:
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2007-02-10 02:48:43 +0000
+++ b/bzrlib/versionedfile.py 2007-06-21 02:33:43 +0000
@@ -302,10 +302,13 @@
"""
raise NotImplementedError(self.get_lines)
- def get_ancestry(self, version_ids):
+ def get_ancestry(self, version_ids, topo_sorted=True):
"""Return a list of all ancestors of given version(s). This
will not include the null revision.
+ This list will not be topologically sorted if topo_sorted=False is
+ passed.
+
Must raise RevisionNotPresent if any of the given versions are
not present in file history."""
if isinstance(version_ids, basestring):
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2007-03-21 01:34:41 +0000
+++ b/bzrlib/weave.py 2007-06-17 17:07:04 +0000
@@ -607,7 +607,7 @@
else:
return self.get_ancestry(version_ids)
- def get_ancestry(self, version_ids):
+ def get_ancestry(self, version_ids, topo_sorted=True):
"""See VersionedFile.get_ancestry."""
if isinstance(version_ids, basestring):
version_ids = [version_ids]
More information about the bazaar-commits
mailing list