Rev 4348: Refactor Repository._find_inconsistent_revision_parents and Repository.get_revisions to a new Repository._iter_revisions which is kinder on memory without needing code duplication. in http://people.ubuntu.com/~robertc/baz2.0/check

Robert Collins robertc at robertcollins.net
Tue May 12 05:24:59 BST 2009


At http://people.ubuntu.com/~robertc/baz2.0/check

------------------------------------------------------------
revno: 4348
revision-id: robertc at robertcollins.net-20090512042457-y91nn2suuc7syinj
parent: robertc at robertcollins.net-20090512042117-nnqulvunpdh3ecd7
committer: Robert Collins <robertc at robertcollins.net>
branch nick: check
timestamp: Tue 2009-05-12 14:24:57 +1000
message:
  Refactor Repository._find_inconsistent_revision_parents and Repository.get_revisions to a new Repository._iter_revisions which is kinder on memory without needing code duplication.
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2009-04-09 20:23:07 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2009-05-12 04:24:57 +0000
@@ -229,24 +229,29 @@
     def _make_parents_provider(self):
         return _KnitsParentsProvider(self.revisions)
 
-    def _find_inconsistent_revision_parents(self):
+    def _find_inconsistent_revision_parents(self, revisions_iterator=None):
         """Find revisions with different parent lists in the revision object
         and in the index graph.
 
+        :param revisions_iterator: None, or an iterator of (revid,
+            Revision-or-None). This iterator controls the revisions checked.
         :returns: an iterator yielding tuples of (revison-id, parents-in-index,
             parents-in-revision).
         """
         if not self.is_locked():
             raise AssertionError()
         vf = self.revisions
-        for index_version in vf.keys():
-            parent_map = vf.get_parent_map([index_version])
+        if revisions_iterator is None:
+            revisions_iterator = self._iter_revisions(None)
+        for revid, revision in revisions_iterator:
+            if revision is None:
+                pass
+            parent_map = vf.get_parent_map([(revid,)])
             parents_according_to_index = tuple(parent[-1] for parent in
-                parent_map[index_version])
-            revision = self.get_revision(index_version[-1])
+                parent_map[(revid,)])
             parents_according_to_revision = tuple(revision.parent_ids)
             if parents_according_to_index != parents_according_to_revision:
-                yield (index_version[-1], parents_according_to_index,
+                yield (revid, parents_according_to_index,
                     parents_according_to_revision)
 
     def _check_for_inconsistent_revision_parents(self):

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-05-12 04:21:17 +0000
+++ b/bzrlib/repository.py	2009-05-12 04:24:57 +0000
@@ -1669,25 +1669,49 @@
 
     @needs_read_lock
     def get_revisions(self, revision_ids):
-        """Get many revisions at once."""
+        """Get many revisions at once.
+        
+        Repositories that need to check data on every revision read should 
+        subclass this method.
+        """
         return self._get_revisions(revision_ids)
 
     @needs_read_lock
     def _get_revisions(self, revision_ids):
         """Core work logic to get many revisions without sanity checks."""
-        for rev_id in revision_ids:
-            if not rev_id or not isinstance(rev_id, basestring):
-                raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
+        revs = {}
+        for revid, rev in self._iter_revisions(revision_ids):
+            if rev is None:
+                raise errors.NoSuchRevision(self, revid)
+            revs[revid] = rev
+        return [revs[revid] for revid in revision_ids]
+
+    def _iter_revisions(self, revision_ids):
+        """Iterate over revision objects.
+
+        :param revision_ids: An iterable of revisions to examine. None may be
+            passed to request all revisions known to the repository. Note that
+            not all repositories can find unreferenced revisions; for those
+            repositories only referenced ones will be returned.
+        :return: An iterator of (revid, revision) tuples. Absent revisions (
+            those asked for but not available) are returned as (revid, None).
+        """
+        if revision_ids is None:
+            revision_ids = self.all_revision_ids()
+        else:
+            for rev_id in revision_ids:
+                if not rev_id or not isinstance(rev_id, basestring):
+                    raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
         keys = [(key,) for key in revision_ids]
         stream = self.revisions.get_record_stream(keys, 'unordered', True)
-        revs = {}
         for record in stream:
+            revid = record.key[0]
             if record.storage_kind == 'absent':
-                raise errors.NoSuchRevision(self, record.key[0])
-            text = record.get_bytes_as('fulltext')
-            rev = self._serializer.read_revision_from_string(text)
-            revs[record.key[0]] = rev
-        return [revs[revid] for revid in revision_ids]
+                yield (revid, None)
+            else:
+                text = record.get_bytes_as('fulltext')
+                rev = self._serializer.read_revision_from_string(text)
+                yield (revid, rev)
 
     @needs_read_lock
     def get_revision_xml(self, revision_id):
@@ -2470,31 +2494,6 @@
         :param check_repo: If False do not check the repository contents, just 
             calculate the data callback_refs requires and call them back.
         """
-        # TODO: Reinstate or confirm its obsolescence.
-        # from Branch.check - a cross check that the parents index
-        # (iter_reverse_revision_history uses that) and the revision objects
-        # match up.
-        #real_rev_history = list(self.repository.iter_reverse_revision_history(
-        #                        last_revision_id))
-        #real_rev_history.reverse()
-        #
-        #mainline_parent_id = None
-        #for revision_id in real_rev_history:
-        #    try:
-        #        revision = self.repository.get_revision(revision_id)
-        #    except errors.NoSuchRevision, e:
-        #        result.errors.append(errors.BzrCheckError(
-        #            "mainline revision {%s} not in repository" % revision_id))
-        #        break
-        #    # In general the first entry on the revision history has no parents.
-        #    # But it's not illegal for it to have parents listed; this can happen
-        #    # in imports from Arch when the parents weren't reachable.
-        #    if mainline_parent_id is not None:
-        #        if mainline_parent_id not in revision.parent_ids:
-        #            raise errors.BzrCheckError("previous revision {%s} not listed among "
-        #                                "parents of {%s}"
-        #                                % (mainline_parent_id, revision_id))
-        #            mainline_parent_id = revision_id
         return self._check(revision_ids, callback_refs=callback_refs,
             check_repo=check_repo)
 




More information about the bazaar-commits mailing list