Rev 3299: Merge in Lukáš's work to only access partial history for most revision specs. in http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/get_rev_id
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 20 17:22:49 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/get_rev_id
------------------------------------------------------------
revno: 3299
revision-id: john at arbash-meinel.com-20080320171926-0eyc8dohs0on591o
parent: pqm at pqm.ubuntu.com-20080320074642-46bf1vcpyubnaptz
parent: lalinsky at gmail.com-20071203230359-lhcqc1peusply2cc
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_rev_id
timestamp: Thu 2008-03-20 12:19:26 -0500
message:
Merge in Lukáš's work to only access partial history for most revision specs.
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/diff.py diff.py-20050309040759-26944fbbf2ebbf36
bzrlib/revisionspec.py revisionspec.py-20050907152633-17567659fd5c0ddb
bzrlib/status.py status.py-20050505062338-431bfa63ec9b19e6
------------------------------------------------------------
revno: 3060.3.6
revision-id: lalinsky at gmail.com-20071203230359-lhcqc1peusply2cc
parent: lalinsky at gmail.com-20071203213554-8x1dsmlqdtm79ala
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Tue 2007-12-04 00:03:59 +0100
message:
Implement partial history cache in BzrBranch6.
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/revisionspec.py revisionspec.py-20050907152633-17567659fd5c0ddb
------------------------------------------------------------
revno: 3060.3.5
revision-id: lalinsky at gmail.com-20071203213554-8x1dsmlqdtm79ala
parent: lalinsky at gmail.com-20071203212805-ehlgkz9aielhi8gb
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Mon 2007-12-03 22:35:54 +0100
message:
Add support for in_branch for the remaining RevisionSpec subclasses.
modified:
bzrlib/revisionspec.py revisionspec.py-20050907152633-17567659fd5c0ddb
bzrlib/status.py status.py-20050505062338-431bfa63ec9b19e6
------------------------------------------------------------
revno: 3060.3.4
revision-id: lalinsky at gmail.com-20071203212805-ehlgkz9aielhi8gb
parent: lalinsky at gmail.com-20071203211103-40soo8ldds0lyu2l
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Mon 2007-12-03 22:28:05 +0100
message:
Replace all RevisionSpec.in_history(...).rev_id calls to RevisionSpec.in_branch(..., need_revno=False).rev_id.
modified:
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/diff.py diff.py-20050309040759-26944fbbf2ebbf36
------------------------------------------------------------
revno: 3060.3.3
revision-id: lalinsky at gmail.com-20071203211103-40soo8ldds0lyu2l
parent: lalinsky at gmail.com-20071203204808-gv1m751xdb8m9ltc
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Mon 2007-12-03 22:11:03 +0100
message:
Make RevisionSpec_revid and RevisionSpec_revno not load the whole revision history.
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/revisionspec.py revisionspec.py-20050907152633-17567659fd5c0ddb
------------------------------------------------------------
revno: 3060.3.2
revision-id: lalinsky at gmail.com-20071203204808-gv1m751xdb8m9ltc
parent: lalinsky at gmail.com-20071203200431-qwd9ju3y4ha12n6r
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Mon 2007-12-03 21:48:08 +0100
message:
Make RevisionSpec_last not load the whole history.
Implement BzrBranch6.get_rev_id, that uses last_revision_info and repository.iter_reverse_revision_history to find out the revision_id incrementally without loading the whole history.
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/revisionspec.py revisionspec.py-20050907152633-17567659fd5c0ddb
bzrlib/status.py status.py-20050505062338-431bfa63ec9b19e6
------------------------------------------------------------
revno: 3060.3.1
revision-id: lalinsky at gmail.com-20071203200431-qwd9ju3y4ha12n6r
parent: pqm at pqm.ubuntu.com-20071130182102-i0t564k01anm7uk2
committer: Lukáš Lalinský <lalinsky at gmail.com>
branch nick: pack-revno
timestamp: Mon 2007-12-03 21:04:31 +0100
message:
Cache last_revision_info in BzrBranch6, since this is often called multiple times within one lock and we don't want to read the file over and over again.
modified:
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
-------------- next part --------------
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py 2008-03-14 10:55:37 +0000
+++ b/bzrlib/branch.py 2008-03-20 17:19:26 +0000
@@ -1375,8 +1375,8 @@
"""See Branch.set_revision_history."""
if 'evil' in debug.debug_flags:
mutter_callsite(3, "set_revision_history scales with history.")
- self._clear_cached_state()
self._write_revision_history(rev_history)
+ self._clear_cached_state()
self._cache_revision_history(rev_history)
for hook in Branch.hooks['set_rh']:
hook(self, rev_history)
@@ -1809,13 +1809,29 @@
class BzrBranch6(BzrBranch5):
+ def __init__(self, *ignored, **ignored_too):
+ super(BzrBranch6, self).__init__(*ignored, **ignored_too)
+ self._last_revision_info_cache = None
+ self._partial_revision_history_cache = None
+
+ def _clear_cached_state(self):
+ super(BzrBranch6, self)._clear_cached_state()
+ self._last_revision_info_cache = None
+ self._partial_revision_history_cache = None
+
@needs_read_lock
def last_revision_info(self):
- revision_string = self.control_files.get('last-revision').read()
- revno, revision_id = revision_string.rstrip('\n').split(' ', 1)
- revision_id = cache_utf8.get_cached_utf8(revision_id)
- revno = int(revno)
- return revno, revision_id
+ """Return information about the last revision.
+
+ :return: A tuple (revno, revision_id).
+ """
+ if self._last_revision_info_cache is None:
+ revision_string = self.control_files.get('last-revision').read()
+ revno, revision_id = revision_string.rstrip('\n').split(' ', 1)
+ revision_id = cache_utf8.get_cached_utf8(revision_id)
+ revno = int(revno)
+ self._last_revision_info_cache = revno, revision_id
+ return self._last_revision_info_cache
def _write_last_revision_info(self, revno, revision_id):
"""Simply write out the revision id, with no checks.
@@ -1837,6 +1853,7 @@
self._check_history_violation(revision_id)
self._write_last_revision_info(revno, revision_id)
self._clear_cached_state()
+ self._last_revision_info_cache = revno, revision_id
def _check_history_violation(self, revision_id):
last_revision = _mod_revision.ensure_null(self.last_revision())
@@ -1848,10 +1865,18 @@
def _gen_revision_history(self):
"""Generate the revision history from last revision
"""
+ if self._partial_revision_history_cache:
+ last_revision = self._partial_revision_history_cache.pop(-1)
+ partial_history = self._partial_revision_history_cache
+ partial_history.reverse()
+ self._partial_revision_history_cache = None
+ else:
+ last_revision = self.last_revision()
+ partial_history = []
history = list(self.repository.iter_reverse_revision_history(
- self.last_revision()))
+ last_revision))
history.reverse()
- return history
+ return history + partial_history
def _write_revision_history(self, history):
"""Factored out of set_revision_history.
@@ -1969,6 +1994,47 @@
revno = len(history)
self.set_last_revision_info(revno, revision_id)
+ @needs_read_lock
+ def get_rev_id(self, revno, history=None):
+ """Find the revision id of the specified revno."""
+ if revno == 0:
+ return _mod_revision.NULL_REVISION
+
+ last_revno, last_revision_id = self.last_revision_info()
+ if revno == last_revno:
+ return last_revision_id
+
+ if revno <= 0 or revno > last_revno:
+ raise errors.NoSuchRevision(self, revno)
+
+ if history is None:
+ history = self._revision_history_cache
+ if history is not None:
+ return history[revno - 1]
+
+ distance = last_revno - revno
+ if self._partial_revision_history_cache:
+ try:
+ return self._partial_revision_history_cache[distance]
+ except IndexError:
+ pass
+ distance -= len(self._partial_revision_history_cache) - 1
+ revision_id = self._partial_revision_history_cache[-1]
+ else:
+ self._partial_revision_history_cache = [last_revision_id]
+ revision_id = last_revision_id
+
+ history_iter = self.repository.iter_reverse_revision_history(
+ revision_id)
+ history_iter.next()
+ for i in xrange(distance):
+ try:
+ revision_id = history_iter.next()
+ except StopIteration:
+ raise errors.NoSuchRevision(self, revno)
+ self._partial_revision_history_cache.append(revision_id)
+ return revision_id
+
######################################################################
# results of operations
=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py 2008-03-19 01:43:43 +0000
+++ b/bzrlib/builtins.py 2008-03-20 17:19:26 +0000
@@ -466,7 +466,7 @@
raise errors.BzrCommandError(
'bzr inventory --revision takes exactly one revision'
' identifier')
- revision_id = revision[0].in_history(work_tree.branch).rev_id
+ revision_id = revision[0].in_branch(work_tree.branch, need_revno=False).rev_id
tree = work_tree.branch.repository.revision_tree(revision_id)
extra_trees = [work_tree]
@@ -655,7 +655,7 @@
if revision is not None:
if len(revision) == 1:
- revision_id = revision[0].in_history(branch_from).rev_id
+ revision_id = revision[0].in_branch(branch_from, need_revno=False).rev_id
else:
raise errors.BzrCommandError(
'bzr pull --revision takes one value.')
@@ -916,7 +916,8 @@
br_from.lock_read()
try:
if len(revision) == 1 and revision[0] is not None:
- revision_id = revision[0].in_history(br_from)[1]
+ revision_id = revision[0].in_branch(br_from,
+ need_revno=False).rev_id
else:
# FIXME - wt.last_revision, fallback to branch, fall back to
# None or perhaps NULL_REVISION to mean copy nothing
@@ -1011,7 +1012,7 @@
accelerator_tree = WorkingTree.open(files_from)
if len(revision) == 1 and revision[0] is not None:
revision_id = _mod_revision.ensure_null(
- revision[0].in_history(source)[1])
+ revision[0].in_branch(source, need_revno=False).rev_id)
else:
revision_id = None
if to_location is None:
@@ -1871,7 +1872,7 @@
relpath += '/'
if revision is not None:
tree = branch.repository.revision_tree(
- revision[0].in_history(branch).rev_id)
+ revision[0].in_branch(branch, need_revno=False).rev_id)
elif tree is None:
tree = branch.basis_tree()
@@ -2136,7 +2137,7 @@
else:
if len(revision) != 1:
raise errors.BzrCommandError('bzr export --revision takes exactly 1 argument')
- rev_id = revision[0].in_history(b).rev_id
+ rev_id = revision[0].in_branch(b, need_revno=False).rev_id
t = b.repository.revision_tree(rev_id)
try:
export(t, dest, format, root)
@@ -2181,7 +2182,7 @@
if revision is None:
revision_id = b.last_revision()
else:
- revision_id = revision[0].in_history(b).rev_id
+ revision_id = revision[0].in_branch(b, need_revno=False).rev_id
cur_file_id = tree.path2id(relpath)
rev_tree = b.repository.revision_tree(revision_id)
@@ -2979,12 +2980,12 @@
else:
other_revision_id = \
_mod_revision.ensure_null(
- revision[-1].in_history(other_branch).rev_id)
+ revision[-1].in_branch(other_branch, need_revno=False).rev_id)
if (revision is not None and len(revision) == 2
and revision[0] is not None):
base_revision_id = \
_mod_revision.ensure_null(
- revision[0].in_history(base_branch).rev_id)
+ revision[0].in_branch(base_branch, need_revno=False).rev_id)
else:
base_revision_id = None
# Remember where we merge from
@@ -3203,7 +3204,7 @@
elif len(revision) != 1:
raise errors.BzrCommandError('bzr revert --revision takes exactly 1 argument')
else:
- rev_id = revision[0].in_history(tree.branch).rev_id
+ rev_id = revision[0].in_branch(tree.branch, need_revno=False).rev_id
pb = ui.ui_factory.nested_progress_bar()
try:
tree.revert(file_list,
@@ -3454,7 +3455,7 @@
if revision is None:
rev_id = b.last_revision()
else:
- rev_id = revision[0].in_history(b).rev_id
+ rev_id = revision[0].in_branch(b, need_revno=False).rev_id
t = testament_class.from_revision(b.repository, rev_id)
if long:
sys.stdout.writelines(t.as_text_lines())
@@ -3501,7 +3502,7 @@
elif len(revision) != 1:
raise errors.BzrCommandError('bzr annotate --revision takes exactly 1 argument')
else:
- revision_id = revision[0].in_history(branch).rev_id
+ revision_id = revision[0].in_branch(branch, need_revno=False).rev_id
tree = branch.repository.revision_tree(revision_id)
if wt is not None:
file_id = wt.path2id(relpath)
@@ -3996,9 +3997,11 @@
if len(revision) > 2:
raise errors.BzrCommandError('bzr merge-directive takes '
'at most two one revision identifiers')
- revision_id = revision[-1].in_history(branch).rev_id
+ revision_id = revision[-1].in_branch(branch,
+ need_revno=False).rev_id
if len(revision) == 2:
- base_revision_id = revision[0].in_history(branch).rev_id
+ base_revision_id = revision[0].in_branch(
+ branch, need_revno=False).rev_id
base_revision_id = ensure_null(base_revision_id)
else:
revision_id = branch.last_revision()
@@ -4163,9 +4166,9 @@
if len(revision) > 2:
raise errors.BzrCommandError('bzr send takes '
'at most two one revision identifiers')
- revision_id = revision[-1].in_history(branch).rev_id
+ revision_id = revision[-1].in_branch(branch, need_revno=False).rev_id
if len(revision) == 2:
- base_revision_id = revision[0].in_history(branch).rev_id
+ base_revision_id = revision[0].in_branch(branch, need_revno=False).rev_id
if revision_id is None:
revision_id = branch.last_revision()
if revision_id == NULL_REVISION:
@@ -4331,7 +4334,7 @@
raise errors.BzrCommandError(
"Tags can only be placed on a single revision, "
"not on a range")
- revision_id = revision[0].in_history(branch).rev_id
+ revision_id = revision[0].in_branch(branch, need_revno=False).rev_id
else:
revision_id = branch.last_revision()
if (not force) and branch.tags.has_tag(tag_name):
=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py 2008-03-07 14:15:10 +0000
+++ b/bzrlib/diff.py 2008-03-20 17:19:26 +0000
@@ -314,9 +314,9 @@
def spec_tree(spec):
if tree:
- revision = spec.in_store(tree.branch)
+ revision = spec.in_branch(tree.branch, need_revno=False)
else:
- revision = spec.in_store(None)
+ revision = spec.in_branch(None, need_revno=False)
revision_id = revision.rev_id
branch = revision.branch
return branch.repository.revision_tree(revision_id)
=== modified file 'bzrlib/revisionspec.py'
--- a/bzrlib/revisionspec.py 2008-03-10 15:39:56 +0000
+++ b/bzrlib/revisionspec.py 2008-03-20 17:19:26 +0000
@@ -232,8 +232,18 @@
# aliases for now, when we fix the core logic, then they
# will do what you expect.
in_store = in_history
- in_branch = in_store
-
+
+ def in_branch(self, branch, need_revno=True):
+ """Evaluate this revision spec and return a RevisionInfo object.
+
+ If need_revno is False, the returned RevisionInfo object might
+ have the revno attribute set as None (for performance reasons),
+ even if the revno exists in the specified branch.
+
+ The default implementation is an alias for RevisionSpec.in_history.
+ """
+ return self.in_history(branch)
+
def __repr__(self):
# this is mostly for helping with testing
return '<%s %s>' % (self.__class__.__name__,
@@ -315,9 +325,7 @@
# the branch object.
from bzrlib.branch import Branch
branch = Branch.open(branch_spec)
- # Need to use a new revision history
- # because we are using a specific branch
- revs = branch.revision_history()
+ revs = None
if dotted:
branch.lock_read()
@@ -335,19 +343,23 @@
# so for API compatability we return None.
return RevisionInfo(branch, None, revisions[0])
else:
+ last_revno, last_revision_id = branch.last_revision_info()
if revno < 0:
# if get_rev_id supported negative revnos, there would not be a
# need for this special case.
- if (-revno) >= len(revs):
+ if (-revno) >= last_revno:
revno = 1
else:
- revno = len(revs) + revno + 1
+ revno = last_revno + revno + 1
try:
revision_id = branch.get_rev_id(revno, revs)
except errors.NoSuchRevision:
raise errors.InvalidRevisionSpec(self.user_spec, branch)
return RevisionInfo(branch, revno, revision_id)
-
+
+ def in_branch(self, branch, need_revno=True):
+ return self._match_on(branch, None)
+
def needs_branch(self):
return self.spec.find(':') == -1
@@ -374,15 +386,27 @@
Examples::
revid:aaaa at bbbb-123456789 -> Select revision 'aaaa at bbbb-123456789'
- """
+ """
+
prefix = 'revid:'
- def _match_on(self, branch, revs):
+ def _match_on(self, branch, revs, need_revno=True):
# self.spec comes straight from parsing the command line arguments,
# so we expect it to be a Unicode string. Switch it to the internal
# representation.
revision_id = osutils.safe_revision_id(self.spec, warn=False)
- return RevisionInfo.from_revision_id(branch, revision_id, revs)
+ if need_revno:
+ try:
+ revno = branch.revision_id_to_revno(revision_id)
+ except errors.NoSuchRevision:
+ revno = None
+ else:
+ revno = None
+ return RevisionInfo(branch, revno, revision_id)
+
+ def in_branch(self, branch, need_revno=True):
+ # Same as RevisionSpec.in_history, but without history loading.
+ return self._match_on(branch, None, need_revno)
SPEC_TYPES.append(RevisionSpec_revid)
@@ -398,15 +422,17 @@
last:1 -> return the last revision
last:3 -> return the revision 2 before the end.
- """
+ """
prefix = 'last:'
def _match_on(self, branch, revs):
+ last_revno, last_revision_id = branch.last_revision_info()
+
if self.spec == '':
- if not revs:
+ if not last_revno:
raise errors.NoCommits(branch)
- return RevisionInfo(branch, len(revs), revs[-1])
+ return RevisionInfo(branch, last_revno, last_revision_id)
try:
offset = int(self.spec)
@@ -416,13 +442,19 @@
if offset <= 0:
raise errors.InvalidRevisionSpec(self.user_spec, branch,
'you must supply a positive value')
- revno = len(revs) - offset + 1
+
+ revno = last_revno - offset + 1
try:
revision_id = branch.get_rev_id(revno, revs)
except errors.NoSuchRevision:
raise errors.InvalidRevisionSpec(self.user_spec, branch)
return RevisionInfo(branch, revno, revision_id)
+ def in_branch(self, branch, need_revno=True):
+ # Same as RevisionSpec.in_history, but without history loading.
+ return self._match_on(branch, None)
+
+
SPEC_TYPES.append(RevisionSpec_last)
@@ -448,8 +480,12 @@
prefix = 'before:'
- def _match_on(self, branch, revs):
- r = RevisionSpec.from_string(self.spec)._match_on(branch, revs)
+ def _match_on(self, branch, revs, need_revno=True, in_branch=False):
+ revspec = RevisionSpec.from_string(self.spec)
+ if in_branch:
+ r = revspec.in_branch(branch, need_revno)
+ else:
+ r = revspec._match_on(branch, revs)
if r.revno == 0:
raise errors.InvalidRevisionSpec(self.user_spec, branch,
'cannot go before the null: revision')
@@ -461,9 +497,12 @@
revision_id = revision.NULL_REVISION
else:
revision_id = rev.parent_ids[0]
- try:
- revno = revs.index(revision_id) + 1
- except ValueError:
+ if need_revno:
+ try:
+ revno = branch.revision_id_to_revno(revision_id)
+ except errors.NoSuchRevision:
+ revno = None
+ else:
revno = None
else:
revno = r.revno - 1
@@ -474,6 +513,9 @@
branch)
return RevisionInfo(branch, revno, revision_id)
+ def in_branch(self, branch, need_revno=True):
+ return self._match_on(branch, None, need_revno, True)
+
SPEC_TYPES.append(RevisionSpec_before)
@@ -487,11 +529,20 @@
prefix = 'tag:'
- def _match_on(self, branch, revs):
+ def _match_on(self, branch, revs, need_revno=True):
# Can raise tags not supported, NoSuchTag, etc
- return RevisionInfo.from_revision_id(branch,
- branch.tags.lookup_tag(self.spec),
- revs)
+ revision_id = branch.tags.lookup_tag(self.spec)
+ if need_revno:
+ try:
+ revno = branch.revision_id_to_revno(revision_id)
+ except errors.NoSuchRevision:
+ revno = None
+ else:
+ revno = None
+ return RevisionInfo(branch, revno, revision_id)
+
+ def in_branch(self, branch, need_revno=True):
+ return self._match_on(branch, None, need_revno)
SPEC_TYPES.append(RevisionSpec_tag)
=== modified file 'bzrlib/status.py'
--- a/bzrlib/status.py 2008-03-07 14:15:10 +0000
+++ b/bzrlib/status.py 2008-03-20 17:19:26 +0000
@@ -84,13 +84,15 @@
old = new.basis_tree()
elif len(revision) > 0:
try:
- rev_id = revision[0].in_history(wt.branch).rev_id
+ rev_id = revision[0].in_branch(wt.branch,
+ need_revno=False).rev_id
old = wt.branch.repository.revision_tree(rev_id)
except errors.NoSuchRevision, e:
raise errors.BzrCommandError(str(e))
if (len(revision) > 1) and (revision[1].spec is not None):
try:
- rev_id = revision[1].in_history(wt.branch).rev_id
+ rev_id = revision[1].in_branch(wt.branch,
+ need_revno=False).rev_id
new = wt.branch.repository.revision_tree(rev_id)
new_is_working_tree = False
except errors.NoSuchRevision, e:
More information about the bazaar-commits
mailing list