Rev 4151: faster check (Ian Clatworthy) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Mar 16 23:24:41 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4151
revision-id: pqm at pqm.ubuntu.com-20090316232437-0hj1iw4cedo312l3
parent: pqm at pqm.ubuntu.com-20090316131816-p0a3ugbpmbqm3a04
parent: ian.clatworthy at canonical.com-20090316222206-xgiuko42s5bui64z
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-03-16 23:24:37 +0000
message:
  faster check (Ian Clatworthy)
modified:
  bzrlib/check.py                check.py-20050309040759-f3a679400c06bcc1
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 4150.1.1
    revision-id: ian.clatworthy at canonical.com-20090316222206-xgiuko42s5bui64z
    parent: pqm at pqm.ubuntu.com-20090316131816-p0a3ugbpmbqm3a04
    parent: ian.clatworthy at canonical.com-20090316131644-r7kevoidcxw30i8d
    committer: Ian Clatworthy <ian.clatworthy at canonical.com>
    branch nick: ianc-integration
    timestamp: Tue 2009-03-17 08:22:06 +1000
    message:
      faster check (Ian Clatworthy)
    modified:
      bzrlib/check.py                check.py-20050309040759-f3a679400c06bcc1
      bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 4145.2.1
    revision-id: ian.clatworthy at canonical.com-20090316131644-r7kevoidcxw30i8d
    parent: pqm at pqm.ubuntu.com-20090316024046-58qc87pfdgu2ugok
    committer: Ian Clatworthy <ian.clatworthy at canonical.com>
    branch nick: bzr.faster-check
    timestamp: Mon 2009-03-16 23:16:44 +1000
    message:
      faster check
    modified:
      bzrlib/check.py                check.py-20050309040759-f3a679400c06bcc1
      bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/check.py'
--- a/bzrlib/check.py	2009-03-08 05:54:50 +0000
+++ b/bzrlib/check.py	2009-03-16 13:16:44 +0000
@@ -63,6 +63,8 @@
         self.checked_weaves = set()
         self.unreferenced_versions = set()
         self.inconsistent_parents = []
+        self.rich_roots = repository.supports_rich_root()
+        self.text_key_references = {}
 
     def check(self):
         self.repository.lock_read()
@@ -209,7 +211,8 @@
         self.inventory_weave.check(progress_bar=self.progress)
         self.progress.update('checking text storage', 1, 2)
         self.repository.texts.check(progress_bar=self.progress)
-        weave_checker = self.repository._get_versioned_file_checker()
+        weave_checker = self.repository._get_versioned_file_checker(
+            text_key_references=self.text_key_references)
         result = weave_checker.check_file_version_parents(
             self.repository.texts, progress_bar=self.progress)
         self.checked_weaves = weave_checker.file_ids
@@ -228,23 +231,30 @@
     def _check_revision_tree(self, rev_id):
         tree = self.repository.revision_tree(rev_id)
         inv = tree.inventory
-        seen_ids = {}
-        for file_id in inv:
+        seen_ids = set()
+        seen_names = set()
+        for path, ie in inv.iter_entries():
+            self._add_entry_to_text_key_references(inv, ie)
+            file_id = ie.file_id
             if file_id in seen_ids:
                 raise BzrCheckError('duplicated file_id {%s} '
                                     'in inventory for revision {%s}'
                                     % (file_id, rev_id))
-            seen_ids[file_id] = True
-        for file_id in inv:
-            ie = inv[file_id]
+            seen_ids.add(file_id)
             ie.check(self, rev_id, inv, tree)
-        seen_names = {}
-        for path, ie in inv.iter_entries():
             if path in seen_names:
                 raise BzrCheckError('duplicated path %s '
                                     'in inventory for revision {%s}'
                                     % (path, rev_id))
-            seen_names[path] = True
+            seen_names.add(path)
+
+    def _add_entry_to_text_key_references(self, inv, entry):
+        if not self.rich_roots and entry == inv.root:
+            return
+        key = (entry.file_id, entry.revision)
+        self.text_key_references.setdefault(key, False)
+        if entry.revision == inv.revision_id:
+            self.text_key_references[key] = True
 
 
 @deprecated_function(deprecated_in((1,6,0)))

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2009-03-12 10:04:53 +0000
+++ b/bzrlib/inventory.py	2009-03-16 22:22:06 +0000
@@ -489,7 +489,6 @@
                 checker.repeated_text_cnt += 1
                 return
 
-        mutter('check version {%s} of {%s}', tree_revision_id, self.file_id)
         checker.checked_text_cnt += 1
         # We can't check the length, because Weave doesn't store that
         # information, and the whole point of looking at the weave's

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-03-16 05:08:19 +0000
+++ b/bzrlib/repository.py	2009-03-16 22:22:06 +0000
@@ -1380,9 +1380,6 @@
     def find_text_key_references(self):
         """Find the text key references within the repository.
 
-        :return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
-        revision_ids. Each altered file-ids has the exact revision_ids that
-        altered it listed explicitly.
         :return: A dictionary mapping text keys ((fileid, revision_id) tuples)
             to whether they were referred to by the inventory of the
             revision_id that they contain. The inventory texts from all present
@@ -1996,9 +1993,17 @@
                 [parents_provider, other_repository._make_parents_provider()])
         return graph.Graph(parents_provider)
 
-    def _get_versioned_file_checker(self):
-        """Return an object suitable for checking versioned files."""
-        return _VersionedFileChecker(self)
+    def _get_versioned_file_checker(self, text_key_references=None):
+        """Return an object suitable for checking versioned files.
+        
+        :param text_key_references: if non-None, an already built
+            dictionary mapping text keys ((fileid, revision_id) tuples)
+            to whether they were referred to by the inventory of the
+            revision_id that they contain. If None, this will be
+            calculated.
+        """
+        return _VersionedFileChecker(self,
+            text_key_references=text_key_references)
 
     def revision_ids_to_search_result(self, result_set):
         """Convert a set of revision ids to a graph SearchResult."""
@@ -3502,9 +3507,10 @@
 
 class _VersionedFileChecker(object):
 
-    def __init__(self, repository):
+    def __init__(self, repository, text_key_references=None):
         self.repository = repository
-        self.text_index = self.repository._generate_text_key_index()
+        self.text_index = self.repository._generate_text_key_index(
+            text_key_references=text_key_references)
 
     def calculate_file_version_parents(self, text_key):
         """Calculate the correct parents for a file version according to




More information about the bazaar-commits mailing list