Rev 4348: Refactor Repository._find_inconsistent_revision_parents and Repository.get_revisions to a new Repository._iter_revisions which is kinder on memory without needing code duplication. in http://people.ubuntu.com/~robertc/baz2.0/check
Robert Collins
robertc at robertcollins.net
Tue May 12 05:24:59 BST 2009
At http://people.ubuntu.com/~robertc/baz2.0/check
------------------------------------------------------------
revno: 4348
revision-id: robertc at robertcollins.net-20090512042457-y91nn2suuc7syinj
parent: robertc at robertcollins.net-20090512042117-nnqulvunpdh3ecd7
committer: Robert Collins <robertc at robertcollins.net>
branch nick: check
timestamp: Tue 2009-05-12 14:24:57 +1000
message:
Refactor Repository._find_inconsistent_revision_parents and Repository.get_revisions to a new Repository._iter_revisions which is kinder on memory without needing code duplication.
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2009-04-09 20:23:07 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2009-05-12 04:24:57 +0000
@@ -229,24 +229,29 @@
def _make_parents_provider(self):
return _KnitsParentsProvider(self.revisions)
- def _find_inconsistent_revision_parents(self):
+ def _find_inconsistent_revision_parents(self, revisions_iterator=None):
"""Find revisions with different parent lists in the revision object
and in the index graph.
+ :param revisions_iterator: None, or an iterator of (revid,
+ Revision-or-None). This iterator controls the revisions checked.
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
parents-in-revision).
"""
if not self.is_locked():
raise AssertionError()
vf = self.revisions
- for index_version in vf.keys():
- parent_map = vf.get_parent_map([index_version])
+ if revisions_iterator is None:
+ revisions_iterator = self._iter_revisions(None)
+ for revid, revision in revisions_iterator:
+ if revision is None:
+ pass
+ parent_map = vf.get_parent_map([(revid,)])
parents_according_to_index = tuple(parent[-1] for parent in
- parent_map[index_version])
- revision = self.get_revision(index_version[-1])
+ parent_map[(revid,)])
parents_according_to_revision = tuple(revision.parent_ids)
if parents_according_to_index != parents_according_to_revision:
- yield (index_version[-1], parents_according_to_index,
+ yield (revid, parents_according_to_index,
parents_according_to_revision)
def _check_for_inconsistent_revision_parents(self):
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-05-12 04:21:17 +0000
+++ b/bzrlib/repository.py 2009-05-12 04:24:57 +0000
@@ -1669,25 +1669,49 @@
@needs_read_lock
def get_revisions(self, revision_ids):
- """Get many revisions at once."""
+ """Get many revisions at once.
+
+ Repositories that need to check data on every revision read should
+ subclass this method.
+ """
return self._get_revisions(revision_ids)
@needs_read_lock
def _get_revisions(self, revision_ids):
"""Core work logic to get many revisions without sanity checks."""
- for rev_id in revision_ids:
- if not rev_id or not isinstance(rev_id, basestring):
- raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
+ revs = {}
+ for revid, rev in self._iter_revisions(revision_ids):
+ if rev is None:
+ raise errors.NoSuchRevision(self, revid)
+ revs[revid] = rev
+ return [revs[revid] for revid in revision_ids]
+
+ def _iter_revisions(self, revision_ids):
+ """Iterate over revision objects.
+
+ :param revision_ids: An iterable of revisions to examine. None may be
+ passed to request all revisions known to the repository. Note that
+ not all repositories can find unreferenced revisions; for those
+ repositories only referenced ones will be returned.
+ :return: An iterator of (revid, revision) tuples. Absent revisions (
+ those asked for but not available) are returned as (revid, None).
+ """
+ if revision_ids is None:
+ revision_ids = self.all_revision_ids()
+ else:
+ for rev_id in revision_ids:
+ if not rev_id or not isinstance(rev_id, basestring):
+ raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
keys = [(key,) for key in revision_ids]
stream = self.revisions.get_record_stream(keys, 'unordered', True)
- revs = {}
for record in stream:
+ revid = record.key[0]
if record.storage_kind == 'absent':
- raise errors.NoSuchRevision(self, record.key[0])
- text = record.get_bytes_as('fulltext')
- rev = self._serializer.read_revision_from_string(text)
- revs[record.key[0]] = rev
- return [revs[revid] for revid in revision_ids]
+ yield (revid, None)
+ else:
+ text = record.get_bytes_as('fulltext')
+ rev = self._serializer.read_revision_from_string(text)
+ yield (revid, rev)
@needs_read_lock
def get_revision_xml(self, revision_id):
@@ -2470,31 +2494,6 @@
:param check_repo: If False do not check the repository contents, just
calculate the data callback_refs requires and call them back.
"""
- # TODO: Reinstate or confirm its obsolescence.
- # from Branch.check - a cross check that the parents index
- # (iter_reverse_revision_history uses that) and the revision objects
- # match up.
- #real_rev_history = list(self.repository.iter_reverse_revision_history(
- # last_revision_id))
- #real_rev_history.reverse()
- #
- #mainline_parent_id = None
- #for revision_id in real_rev_history:
- # try:
- # revision = self.repository.get_revision(revision_id)
- # except errors.NoSuchRevision, e:
- # result.errors.append(errors.BzrCheckError(
- # "mainline revision {%s} not in repository" % revision_id))
- # break
- # # In general the first entry on the revision history has no parents.
- # # But it's not illegal for it to have parents listed; this can happen
- # # in imports from Arch when the parents weren't reachable.
- # if mainline_parent_id is not None:
- # if mainline_parent_id not in revision.parent_ids:
- # raise errors.BzrCheckError("previous revision {%s} not listed among "
- # "parents of {%s}"
- # % (mainline_parent_id, revision_id))
- # mainline_parent_id = revision_id
return self._check(revision_ids, callback_refs=callback_refs,
check_repo=check_repo)
More information about the bazaar-commits
mailing list