[MERGE] Common dominator ancestor picker
John A Meinel
john at arbash-meinel.com
Sat Feb 18 04:30:38 GMT 2006
Aaron Bentley wrote:
> Hi all,
>
> I've implemented an ancestor picker based on the closest common
> dominator. This speeds up find-merge-base by a factor of 10 on the bzr
> source tree, and is likely to do even better as the bzr ancestry gets
> bigger.
>
> The old algorithm scaled linearly with the number of nodes in the
> ancestry. This algorithm scales linearly with the number of nodes
> between each revision and their closest common dominator. I invented it
> myself, so let me know if there's anything wrong with the theory.
>
> Not only is this algorithm faster, but it's also more correct, as it
> will handle criss-cross merges better.
>
> Monotone's is slightly fancier; they find the common merge that is a
> dominator of one. I don't think the difference will be significant, and
> we can always implement it later.
>
> Could someone give me a review? If approved, I'll stick it in my
> integration.
>
> Aaron
Hopefully this will be a more complete review...
------------------------------------------------------------------------
=== modified file 'bzrlib/revision.py'
--- bzrlib/revision.py
+++ bzrlib/revision.py
@@ -279,20 +279,6 @@
return root, ancestors, descendants, common
-def common_ancestor(revision_a, revision_b, revision_source):
- try:
- root, ancestors, descendants, common = \
- combined_graph(revision_a, revision_b, revision_source)
- except bzrlib.errors.NoCommonRoot:
- raise bzrlib.errors.NoCommonAncestor(revision_a, revision_b)
-
- distances = node_distances (descendants, ancestors, root)
- farthest = select_farthest(distances, common)
- if farthest is None or farthest == NULL_REVISION:
- raise bzrlib.errors.NoCommonAncestor(revision_a, revision_b)
- return farthest
-
-
class MultipleRevisionSources(object):
"""Proxy that looks in multiple branches for revisions."""
def __init__(self, *args):
@@ -350,3 +336,94 @@
next = best_ancestor(next)
path.reverse()
return path
+
+
+def dominators(revision_id, revision_source):
+ """Return the dominators of a revision.
+ This is based on the observation that in a DAG, the dominators of x
+ are x + common_dominators(parents(x)). This applies recursively,
of course.
+ """
+ while True:
+ yield revision_id
+ if revision_id is NULL_REVISION:
+ return
+ try:
+ revision = revision_source.get_revision(revision_id)
+ except NoSuchRevision:
+ return
+
+ if len(revision.parent_ids) == 0:
+ revision_id = NULL_REVISION
+ elif len(revision.parent_ids) == 1:
+ (revision_id,) = revision.parent_ids
+ else:
+ revision_id = common_dominator(*revision.parent_ids)
+
So, as mentioned you need to pass the revision source here. Also, all
the common_dominator code takes the revision source as the first
argument, to be consistent, it might be good to have 'dominator' do the
same.
+
+def common_dominator(revision_source, *revision_ids):
+ """Find the common dominator of a number of revision ids.
+ This will walk a subgraph, and ends early when it finds the common
+ dominator. It should visit each node no more than twice.
+ """
+ if len(revision_ids) == 1:
+ return revision_ids[0]
+ elif len(revision_ids) == 2:
+ return common_dominator_pair(revision_source, *revision_ids)
+ else:
+ left_revision = common_dominator(revision_source, revision_ids[:2])
+ right_revision = common_dominator(revision_source,
revision_ids[2:])
+ return common_dominator_pair(revision_source, left_revision,
+ right_revision)
Here, I think you should do:
left_revision = common_dominator_pair(revision_source,
revision_ids[0], revision_ids[1])
right_revision = common_dominator(revision_source,
*revision_ids[2:])
return common_dominator_pair(revision_source,
left_revision, right_revision)
You need to pass revisions as a list of arguments to common_dominator
for the right revision, and you probably should just call
common_dominator_pair for 'left_revision' since you know you are passing
2 entries, and it will just call _pair for you.
+
+
+def common_dominator_pair(revision_source, left, right):
+ """Returns the common dominator of a pair of revisions, or
NULL_REVISION
+ if they have no common dominator.
+ """
+ seen = set()
+ iter_left = dominators(left, revision_source)
+ iter_right = dominators(right, revision_source)
+ first_left = None
+ first_right = None
+ last_revision_id = None
+ while True:
+ if iter_left is None:
+ if iter_right is None:
+ raise AssertionError("Unreachable code")
+ else:
+ try:
+ left_revision_id = iter_left.next()
+ if first_left is None:
+ first_left = left_revision_id
+ except StopIteration:
+ # Handle ghosts by backtracking
+ return first_right
+ if left_revision_id is NULL_REVISION:
+ left_iter = None
+ if left_revision_id in seen:
+ return left_revision_id
+ else:
+ seen.add(left_revision_id)
+ if iter_right is not None:
+ try:
+ right_revision_id = iter_right.next()
+ except StopIteration:
+ # Handle ghosts by backtracking
+ return first_left
+ if right_revision_id is NULL_REVISION:
+ iter_right = None
+ if right_revision_id in seen:
+ return right_revision_id
+ else:
+ seen.add(right_revision_id)
+
You probably don't need the else for:
if foo:
return X
else:
do something
You really just need:
if foo:
return X
do something
+
+def common_ancestor(revision_a, revision_b, revision_source):
+ """Find the closest unambiguous common ancestor of two revisions.
+ In the case of criss-cross merges, this algorithm will ignore the
criss-
+ crossed ancestors.
+ """
+ cd = common_dominator(revision_source, revision_a, revision_b)
+ if cd is NULL_REVISION:
+ raise bzrlib.errors.NoCommonAncestor(revision_a, revision_b)
+ return cd
=== modified file 'bzrlib/tests/test_revision.py'
--- bzrlib/tests/test_revision.py
+++ bzrlib/tests/test_revision.py
@@ -48,6 +48,23 @@
so A is missing b6 at the start
and B is missing a3, a4, a5
+
+ or graphically
+ a0
+ |
+ a1
+ |
+ a2
+ | \
+ | b3
+ | |
+ | b4
+ | /|
+ a3 b5
+ | |
+ a4 |
+ | X|
+ a5 b6
"""
tree1 = self.make_branch_and_tree("branch1")
br1 = tree1.branch
@@ -212,6 +229,14 @@
class TestCommonAncestor(TestCaseWithTransport):
"""Test checking whether a revision is an ancestor of another
revision"""
+ @staticmethod
+ def assertCommonAncestor(ancestor, rev_a, rev_b, revision_source):
+ from bzrlib.revision import common_ancestor
+ anc = common_ancestor(rev_a, rev_b, revision_source)
+ if anc != ancestor:
+ raise AssertionError("Common ancestor of %s and %s should
be %s, not %s" %
+ (rev_a, rev_b, ancestor, anc))
+
Wouldn't it be better to write this as:
def assertCommonAncestor(self, ancestor, rev_a, rev_b, source):
...
if anc != ancestor:
self.fail('Common ancestor of %s and %s should be %s, not %s'
% (rev_a, rev_b, ancestor, anc))
I realize it is about the same thing, but I prefer 'self.fail' to raise
AssertionError.
def test_old_common_ancestor(self):
"""Pick a resonable merge base using the old functionality"""
from bzrlib.revision import old_common_ancestor as common_ancestor
@@ -278,14 +303,16 @@
revisions[1])
self.assertEqual(common_ancestor(revisions[2], revisions_2[4],
sources),
revisions[2])
+ self.assertCommonAncestor(revisions_2[4], revisions[3],
revisions_2[4],
+ sources)
self.assertEqual(common_ancestor(revisions[3], revisions_2[4],
sources),
revisions_2[4])
self.assertEqual(common_ancestor(revisions[4], revisions_2[5],
sources),
revisions_2[4])
- self.assertEqual(common_ancestor(revisions[5], revisions_2[6],
sources),
- revisions[4]) # revisions_2[5] is equally valid
- self.assertEqual(common_ancestor(revisions_2[6], revisions[5],
sources),
- revisions[4]) # revisions_2[5] is equally valid
+ self.assertCommonAncestor(revisions_2[4], revisions[5],
revisions_2[6],
+ sources)
+ self.assertCommonAncestor(revisions_2[4], revisions_2[6],
revisions[5],
+ sources)
def test_combined(self):
"""combined_graph
You also have some lines which are longer than 80 characters. But the
basic concept looks good.
John
=:->
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 249 bytes
Desc: OpenPGP digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060217/3ca8950f/attachment.pgp
More information about the bazaar
mailing list