[MERGE] RevisionSpec.as_revision_id()

Wed Mar 26 17:44:33 GMT 2008

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

John Arbash Meinel wrote:
> ~   I think it is both clearer, *and* we should probably try to deprecate
> ~   .in_history() at some point. Our internals pretty much universally
> work in
> ~   terms of revision ids. And generally where they don't, they should
> be updated
> ~   so that they do.

Sure.  I'd add that with log, it really helps to know that the start and
end revisions are in the lefthand ancestry.

Also, I think we will probably want to support getting trees from
revision specs at some point-- the 'current:' tree is an obvious one,
but also perhaps merge preview trees.

> === modified file 'bzrlib/branch.py'
> --- bzrlib/branch.py	2008-03-14 10:55:37 +0000
> +++ bzrlib/branch.py	2008-03-20 17:28:10 +0000
> @@ -1375,8 +1375,8 @@
>          """See Branch.set_revision_history."""
>          if 'evil' in debug.debug_flags:
>              mutter_callsite(3, "set_revision_history scales with history.")
> -        self._clear_cached_state()
>          self._write_revision_history(rev_history)
> +        self._clear_cached_state()

^^^ what's the purpose of moving this?  The previous position gave
write_revision_history the opportunity to populate the cache.

>          self._cache_revision_history(rev_history)
>          for hook in Branch.hooks['set_rh']:
>              hook(self, rev_history)
> @@ -1809,13 +1809,29 @@
>  
>  class BzrBranch6(BzrBranch5):
>  
> +    def __init__(self, *args, **kwargs):
> +        super(BzrBranch6, self).__init__(*args, **kwargs)
> +        self._last_revision_info_cache = None
> +        self._partial_revision_history_cache = None

Hmm.  What happened to the identity map we were using for caching?

>      @needs_read_lock
>      def last_revision_info(self):
> -        revision_string = self.control_files.get('last-revision').read()
> -        revno, revision_id = revision_string.rstrip('\n').split(' ', 1)
> -        revision_id = cache_utf8.get_cached_utf8(revision_id)
> -        revno = int(revno)
> -        return revno, revision_id
> +        """Return information about the last revision.
> +
> +        :return: A tuple (revno, revision_id).
> +        """
> +        if self._last_revision_info_cache is None:
> +            revision_string = self.control_files.get('last-revision').read()
> +            revno, revision_id = revision_string.rstrip('\n').split(' ', 1)
> +            revision_id = cache_utf8.get_cached_utf8(revision_id)
> +            revno = int(revno)
> +            self._last_revision_info_cache = revno, revision_id
> +        return self._last_revision_info_cache

It seems like the actual retrieval of last_revision_info should be split
out into a private method.  That separates the concerns of caching and
parsing a revision_info.

    def _last_revision_info(self):
        revision_string = self.control_files.get('last-revision').read()
        revno, revision_id = revision_string.rstrip('\n').split(' ', 1)
        revision_id = cache_utf8.get_cached_utf8(revision_id)
        revno = int(revno)
        return revno, revision_id

    @needs_read_lock
    def last_revision_info(self):
        """Return information about the last revision.

        :return: A tuple (revno, revision_id).
        """
        if self._last_revision_info_cache is None:
            self._last_revision_info_cache = self._last_revision_info()
        return self._last_revision_info_cache

> @@ -1848,10 +1865,18 @@
>      def _gen_revision_history(self):
>          """Generate the revision history from last revision
>          """
> +        if self._partial_revision_history_cache:
> +            last_revision = self._partial_revision_history_cache.pop(-1)
> +            partial_history = self._partial_revision_history_cache
> +            partial_history.reverse()
> +            self._partial_revision_history_cache = None

^^^ I don't see why _gen_revision_history should clear the partial
history.  It seems like a wasted opportunity to cache the whole history.

It doesn't seem particularly useful for the partial history to be stored
in forward order, either.

> @@ -1969,6 +1994,47 @@
>          revno = len(history)
>          self.set_last_revision_info(revno, revision_id)
>  
> +    @needs_read_lock
> +    def get_rev_id(self, revno, history=None):
> +        """Find the revision id of the specified revno."""
> +        if revno == 0:
> +            return _mod_revision.NULL_REVISION
> +
> +        last_revno, last_revision_id = self.last_revision_info()
> +        if revno == last_revno:
> +            return last_revision_id
> +
> +        if revno <= 0 or revno > last_revno:
> +            raise errors.NoSuchRevision(self, revno)
> +
> +        if history is None:
> +            history = self._revision_history_cache
> +        if history is not None:
> +            return history[revno - 1]
> +
> +        distance = last_revno - revno
> +        if self._partial_revision_history_cache:
> +            if len(self._partial_revision_history_cache) > distance:
> +                return self._partial_revision_history_cache[distance]
> +            distance -= len(self._partial_revision_history_cache) - 1
> +            revision_id = self._partial_revision_history_cache[-1]
> +        else:
> +            self._partial_revision_history_cache = [last_revision_id]
> +            revision_id = last_revision_id
> +
> +        history_iter = self.repository.iter_reverse_revision_history(
> +            revision_id)
> +        # iter_reverse_revision_history returns the revision_id we passed in as
> +        # the first node, so skip past it.
> +        history_iter.next()
> +        for i in xrange(distance):
> +            try:
> +                revision_id = history_iter.next()
> +            except StopIteration:
> +                raise errors.NoSuchRevision(self, revno)
> +            self._partial_revision_history_cache.append(revision_id)
> +        return revision_id

^^^ I think this could be broken up a lot more cleanly; introduce an
_iter_reverse_revision_history:

def _iter_reverse_revision_history(self):
    if self._partial_history_cache is None:
        self._partial_history_cache = []
    for revision_id in self._partial_history_cache:
        yield revision_id
    for revision_id in self.repository._iter_reverse_revision_history(
        revision_id):
        self._partial_history_cache.append()
        yield revision_id

And then get_rev_id is something like:
def get_rev_id(revno)
    cur_revno, last_revision = self.get_revision_info()
    if last_revno < revno:
        raise NoSuchRevision()
    for revision_id in self._iter_reverse_revision_history():
        if cur_revno == revno:
            return revision_id
        revno -= 1
    else:
        raise NoSuchRevision()

> === renamed file 'bzrlib/tests/test_revisionnamespaces.py' => 'bzrlib/tests/test_revisionspec.py'
> --- bzrlib/tests/test_revisionnamespaces.py	2007-11-27 00:44:20 +0000
> +++ bzrlib/tests/test_revisionspec.py	2008-03-20 22:45:35 +0000
> @@ -49,16 +49,18 @@
>  
>          self.tree = self.make_branch_and_tree('tree')
>          self.build_tree(['tree/a'])
> -        self.tree.add(['a'])
> -        self.tree.commit('a', rev_id='r1')
> -
> -        self.tree2 = self.tree.bzrdir.sprout('tree2').open_workingtree()
> -        self.tree2.commit('alt', rev_id='alt_r2')
> -
> -        self.tree.branch.repository.fetch(self.tree2.branch.repository,
> -                                          revision_id='alt_r2')
> -        self.tree.set_pending_merges(['alt_r2'])
> -        self.tree.commit('second', rev_id='r2')
> +        self.tree.lock_write()
> +        try:
> +            self.tree.add(['a'])
> +            self.tree.commit('a', rev_id='r1')
> +
> +            self.tree2 = self.tree.bzrdir.sprout('tree2').open_workingtree()
> +            self.tree2.commit('alt', rev_id='alt_r2')
> +
> +            self.tree.merge_from_branch(self.tree2.branch)
> +            self.tree.commit('second', rev_id='r2')
> +        finally:
> +            self.tree.unlock()

^^^ Usually, we do addCleanup(tree.unlock) instead of a try/finally in
tests.

> +    def test_as_revision_id(self):
> +        self.assertAsRevisionId('r1', '1')
> +        self.assertAsRevisionId('r2', '2')
> +        self.assertAsRevisionId('r1', '-2')
> +        self.assertAsRevisionId('r2', '-1')
> +        self.assertAsRevisionId('alt_r2', '1.1.1')

Since you've stripped ensure_null out of the callsites, I'd like a test
that each revision spec emits NULL_REVISION for the null revision, not None.

Aaron
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFH6ouA0F+nu1YWqI0RApIyAJwIqtS17ILl+l6TZvCyDB1H1PcHPQCfbiHc
bc9CdN87w1A5QnF7+GNGqHc=
=uvoV
-----END PGP SIGNATURE-----