Rev 4357: Checkpointing refactoring of inventory/file checks. in http://people.ubuntu.com/~robertc/baz2.0/check

Robert Collins robertc at robertcollins.net
Mon Jun 1 04:33:21 BST 2009


At http://people.ubuntu.com/~robertc/baz2.0/check

------------------------------------------------------------
revno: 4357
revision-id: robertc at robertcollins.net-20090601033315-1qj9cuq5e69vokao
parent: robertc at robertcollins.net-20090601015336-yjjkkneufeteugxk
committer: Robert Collins <robertc at robertcollins.net>
branch nick: check
timestamp: Mon 2009-06-01 13:33:15 +1000
message:
  Checkpointing refactoring of inventory/file checks.
=== modified file 'bzrlib/check.py'
--- a/bzrlib/check.py	2009-06-01 01:53:36 +0000
+++ b/bzrlib/check.py	2009-06-01 03:33:15 +0000
@@ -82,9 +82,9 @@
         self.other_results = []
         # Plain text lines to include in the report
         self._report_items = []
-        # Sha1 expectations; may be large and need spilling to disk.
-        # key->(sha1, first-referer)
-        self.expected_sha1 = {}
+        # Keys we are looking for; may be large and need spilling to disk.
+        # key->(type(revision/inventory/text/signature/map), sha1, first-referer)
+        self.pending_keys = {}
         # Ancestors map for all of revisions being checked; while large helper
         # functions we call would create it anyway, so better to have once and
         # keep.
@@ -98,20 +98,10 @@
         try:
             self.progress.update('check', 0, 4)
             if self.check_repo:
-                self.progress.update('checking revision graph', 0)
-                self.check_revision_graph()
-                self.progress.update('checking revisions', 1)
-                revbar = bzrlib.ui.ui_factory.nested_progress_bar()
-                revno = 0
-                try:
-                    while revno < len(self.planned_revisions):
-                        rev_id = self.planned_revisions[revno]
-                        revbar.update('checking revision', revno,
-                            len(self.planned_revisions))
-                        revno += 1
-                        self._check_revision_tree(rev_id)
-                finally:
-                    revbar.finished()
+                self.progress.update('checking revisions', 0)
+                self.check_revisions()
+                self.progress.update('checking inventories', 1)
+                self.repository._check_inventories(self)
                 self.progress.update('checking revision contents', 2)
                 # check_weaves is done after the revision scan so that
                 # revision index is known to be valid.
@@ -162,7 +152,7 @@
             self.progress.finished()
             self.repository.unlock()
 
-    def check_revisions(self, revisions_iterator):
+    def _check_revisions(self, revisions_iterator):
         """Check revision objects by decorating a generator.
 
         :param revisions_iterator: An iterator of(revid, Revision-or-None).
@@ -172,11 +162,15 @@
         for revid, revision in revisions_iterator:
             yield revid, revision
             self._check_one_rev(revid, revision)
+        # Flatten the revisions we found to guarantee consistent later
+        # iteration.
         self.planned_revisions = list(self.planned_revisions)
+        # TODO: extract digital signatures as items to callback on too.
 
-    def check_revision_graph(self):
+    def check_revisions(self):
+        """Scan revisions, checking data directly available as we go."""
         revision_iterator = self.repository._iter_revisions(None)
-        revision_iterator = self.check_revisions(revision_iterator)
+        revision_iterator = self._check_revisions(revision_iterator)
         # We read the all revisions here:
         # - doing this allows later code to depend on the revision index.
         # - we can fill out existence flags at this point
@@ -274,27 +268,26 @@
                 self.ghosts.add(parent)
         
         self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
-        # If the revision has an inventory sha, we want to cross check it later.
-        if rev.inventory_sha1:
-            self.add_sha_check(rev_id, ('inventories', rev_id),
+        self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
             rev.inventory_sha1)
         self.checked_rev_cnt += 1
 
-    def add_sha_check(self, referer, key, sha1):
+    def add_pending_item(self, referer, key, kind, sha1):
         """Add a reference to a sha1 to be cross checked against a key.
 
         :param referer: The referer that expects key to have sha1.
         :param key: A storage key e.g. ('texts', 'foo at bar-20040504-1234')
-        :param sha1: A hex sha1.
+        :param kind: revision/inventory/text/map/signature
+        :param sha1: A hex sha1 or None if no sha1 is known.
         """
-        existing = self.expected_sha1.get(key)
+        existing = self.pending_keys.get(key)
         if existing:
-            if sha1 != existing[0]:
+            if sha1 != existing[1]:
                 self._report_items.append('Multiple expected sha1s for %s. {%s}'
                     ' expects {%s}, {%s} expects {%s}', (
                     key, referer, sha1, existing[1], existing[0]))
         else:
-            self.expected_sha1[key] = (sha1, referer)
+            self.pending_keys[key] = (kind, sha1, referer)
 
     def check_weaves(self):
         """Check all the weaves we can get our hands on.
@@ -334,18 +327,6 @@
                 (revision_id, weave_id, weave_parents, correct_parents))
         self.unreferenced_versions.update(unused_versions)
 
-    def _check_revision_tree(self, rev_id):
-        try:
-            tree = self.repository.revision_tree(rev_id)
-        except errors.NoSuchRevision:
-            self._report_items.append(
-                "Missing inventory for revision {%s}" % rev_id)
-        inv = tree.inventory
-        for path, ie in inv.iter_entries():
-            self._add_entry_to_text_key_references(inv, ie)
-            file_id = ie.file_id
-            ie.check(self, rev_id, inv, tree)
-
     def _add_entry_to_text_key_references(self, inv, entry):
         if not self.rich_roots and entry == inv.root:
             return

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-05-12 06:32:23 +0000
+++ b/bzrlib/repository.py	2009-06-01 03:33:15 +0000
@@ -1150,6 +1150,41 @@
         # The old API returned a list, should this actually be a set?
         return parent_map.keys()
 
+    def _check_inventories(self, checker):
+        """Check the inventories found from the revision scan.
+        
+        This checks all data that is tree-shape and not file-content.
+        """
+        revbar = ui.ui_factory.nested_progress_bar()
+        revno = 0
+        count = len(checker.pending_keys)
+        current_keys = checker.pending_keys
+        checker.pending_keys = {}
+        keys = set()
+        for key in current_keys:
+            if key[0] != 'inventories':
+                checker._report_items.append('unknown key type %r' % key)
+            keys.add(key[1:])
+        # XXX: below is to-go code that accesses texts one at a time.
+        try:
+            while revno < len(checker.planned_revisions):
+                rev_id = checker.planned_revisions[revno]
+                revbar.update('checking revision', revno,
+                    len(checker.planned_revisions))
+                revno += 1
+                try:
+                    tree = self.revision_tree(rev_id)
+                except errors.NoSuchRevision:
+                    self._report_items.append(
+                        "Missing inventory for revision {%s}" % rev_id)
+                inv = tree.inventory
+                for path, ie in inv.iter_entries():
+                    checker._add_entry_to_text_key_references(inv, ie)
+                    file_id = ie.file_id
+                    ie.check(checker, rev_id, inv, tree)
+        finally:
+            revbar.finished()
+
     @staticmethod
     def create(a_bzrdir):
         """Construct the current default format repository in a_bzrdir."""




More information about the bazaar-commits mailing list