[MERGE] Speed up 'bzr reconcile' [was: Re: 'bzr reconcile' *really* slow]

Andrew Bennetts andrew at canonical.com
Mon Oct 15 11:48:19 BST 2007


Ahem.  Attached.

Andrew Bennetts wrote:
> [...]
> 
> Ok, with the attached bundle I can reconcile bzr.dev in under 40 minutes and
> 310MB of memory on my pretty average laptop.
> 
> This isn't stunning but it's adequate for fixing bzr.dev, at least.  So I think
> we should merge this bundle.  The changes are fairly straightforward.
> 
> -Andrew.
> 
> 
-------------- next part --------------
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: andrew.bennetts at canonical.com-20071015091030-\
#   t9f7qvqueo9lswgc
# target_branch: http://bazaar-vcs.org/bzr/bzr.dev
# testament_sha1: f2780a99d57585a80b2e1818682e76f61643449c
# timestamp: 2007-10-15 19:10:47 +1000
# source_branch: http://people.ubuntu.com/~andrew/bzr/reconcile-speed
# base_revision_id: pqm at pqm.ubuntu.com-20071012085726-lyq36i8bo7ew28ba
# 
# Begin patch
=== modified file 'bzrlib/reconcile.py'
--- bzrlib/reconcile.py	2007-10-05 02:19:09 +0000
+++ bzrlib/reconcile.py	2007-10-15 09:10:30 +0000
@@ -372,12 +372,14 @@
         """
         transaction = self.repo.get_transaction()
         revision_versions = repository._RevisionTextVersionCache(self.repo)
+        versions = self.revisions.versions()
+        revision_versions.prepopulate_revs(versions)
         for num, file_id in enumerate(self.repo.weave_store):
             self.pb.update('Fixing text parents', num,
                            len(self.repo.weave_store))
             vf = self.repo.weave_store.get_weave(file_id, transaction)
             vf_checker = self.repo.get_versioned_file_checker(
-                self.revisions.versions(), revision_versions)
+                versions, revision_versions)
             versions_with_bad_parents = vf_checker.check_file_version_parents(
                 vf, file_id)
             if len(versions_with_bad_parents) == 0:

=== modified file 'bzrlib/repository.py'
--- bzrlib/repository.py	2007-10-12 08:18:54 +0000
+++ bzrlib/repository.py	2007-10-15 09:10:30 +0000
@@ -2489,6 +2489,9 @@
     def __init__(self, repository):
         self.repository = repository
         self.revision_versions = {}
+        self.revision_parents = {}
+        self.repo_graph = self.repository.get_graph()
+        self.rev_heads = {}
 
     def add_revision_text_versions(self, tree):
         """Cache text version data from the supplied revision tree"""
@@ -2503,10 +2506,44 @@
         try:
             inv_revisions = self.revision_versions[revision_id]
         except KeyError:
-            tree = self.repository.revision_tree(revision_id)
-            inv_revisions = self.add_revision_text_versions(tree)
+            try:
+                tree = self.repository.revision_tree(revision_id)
+            except errors.RevisionNotPresent:
+                self.revision_versions[revision_id] = inv_revisions = {}
+            else:
+                inv_revisions = self.add_revision_text_versions(tree)
         return inv_revisions.get(file_id)
 
+    def prepopulate_revs(self, revision_ids):
+        # Filter out versions that we don't have an inventory for, so that the
+        # revision_trees() call won't fail.
+        inv_weave = self.repository.get_inventory_weave()
+        revs = [r for r in revision_ids if inv_weave.has_version(r)]
+        # XXX: this loop is very similar to
+        # bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
+        while revs:
+            for tree in self.repository.revision_trees(revs[:100]):
+                if tree.inventory.revision_id is None:
+                    tree.inventory.revision_id = tree.get_revision_id()
+                self.add_revision_text_versions(tree)
+            revs = revs[100:]
+
+    def get_parents(self, revision_id):
+        try:
+            return self.revision_parents[revision_id]
+        except KeyError:
+            parents = self.repository.get_parents([revision_id])[0]
+            self.revision_parents[revision_id] = parents
+            return parents
+
+    def heads(self, revision_ids):
+        revision_ids = tuple(revision_ids)
+        try:
+            return self.rev_heads[revision_ids]
+        except KeyError:
+            heads = self.repo_graph.heads(revision_ids)
+            self.rev_heads[revision_ids] = heads
+            return heads
 
 class VersionedFileChecker(object):
 
@@ -2520,25 +2557,17 @@
             file_id, revision_id)
         if text_revision is None:
             return None
-        parents_of_text_revision = self.repository.get_parents(
-            [text_revision])[0]
+        parents_of_text_revision = self.revision_versions.get_parents(
+            text_revision)
         parents_from_inventories = []
         for parent in parents_of_text_revision:
             if parent == _mod_revision.NULL_REVISION:
                 continue
-            try:
-                inventory = self.repository.get_inventory(parent)
-            except errors.RevisionNotPresent:
-                pass
-            else:
-                try:
-                    introduced_in = inventory[file_id].revision
-                except errors.NoSuchId:
-                    pass
-                else:
-                    parents_from_inventories.append(introduced_in)
-        graph = self.repository.get_graph()
-        heads = set(graph.heads(parents_from_inventories))
+            introduced_in = self.revision_versions.get_text_version(file_id,
+                    parent)
+            if introduced_in is not None:
+                parents_from_inventories.append(introduced_in)
+        heads = set(self.revision_versions.heads(parents_from_inventories))
         new_parents = []
         for parent in parents_from_inventories:
             if parent in heads and parent not in new_parents:

# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWeRytC8AA9Z/gGRUQABZ5///
f///6r////pgCE+e3uy6etmAbjaKAAG2qUjhkKaIp7KeU8VP1Gk2GmomxNT2lPUNpDGppp6g0NNN
6iaAlFPVNT9U/U8kEP1T0g0aAAAAAAAAAAipsKemSNAA0aAAAA0AAAANAAlNEITKD0nkQmgMgbSa
A0B6gAaaGgADhoZNNDTI0NMjIMjI0MgMTRk0AZMjEMJFBAmmmmQJiaGkwCnpPSYmppk0bU09TRhG
j0aiYWjIChIA5u168l7exH/UQIB2dR2YKXTjBVJbtKH8FL7DK8jjKc5SHxhcMiVc90lJmPRDRn8t
CoTUX2a8LNc9ypSD30mHXlnZXbqgSpombXuEhBNJI1aJcVQbxpJhI5eQOX7zqr1XhtRVEtaCaL0Z
ZyoSIREV1vLa3yh/GFUjCjrQDwy4JR60cvOHtZfpa5VaaFoXQeNT70QeAQeRD7xRl6lMqQirZJBC
FSZMhMXg6hQEJADApzHM1TUZOXspsiXu61KSYVZZ11h5ZaJYTLivLkhm8K7Hnmu3p7uXLO6cYhhd
jEfAvFEAFf32BOkDJx0DyiThiZyOVmZwUihCrUsPFqy0EEDPZUUuyC+rC+UbSCBrNZr3h5hYcoag
CZ8+NAZNeO4R03wOauNDwH6UKlfsDEX3k1a6Vbl0ShCLBhxMhMa4S3G5aX1OfzzhupdVQtb6mg5o
3a6BE3DZs7RIZppIIbEROQroJCIEDaRZUlsjEZIuKKACk7/CksRRXlICMSsXCqQWdH4HzOY0tAY2
nFbnpCZVFRYytYMKJTIiOEJ6sC0bMBjJaieA686JlFNUtJf73RoUIjJiNkcrLDULvJCjo0rIqvyO
W1YDtBFReWNbtwMxpKkLJEi99N5JkgoMKFcu1FQHGUDQAw7+ghqPPepCT1ZoiFdRZXCyKRLVg/Ae
l6MUMMX0LFBVNbRDZOuyTwlk2zLChSnLLTKszmJMqpGVFSmvhVZtJ2BLKZMSkyDVJAR0qIcJEhmY
rgZjqgWbJhEpKLc5zFB698rCvQM87r45NXJWyQtN4yJVoIGBt3hbURxkcCYmGqRgSLQ/PbhLXeQA
HgV5byQXGYvykKwqir0TIFJ9IRLysRz3StqqLYVRegyqUonDxpoCZAuH9lZUHcGcLy0rMp8abIlo
xkDMOmWI5iJDBw3k2ZCDMhmpKS0pPYItNIj2rHuN5GkpjD0+eTL8bvmcmx7jkDlRENSwFO44+sta
X9Ed7pAxqZfM7lhmwMhJE04izg42z7UUcrTw7DcQPvTZwdQ7LLfbGISERBy/QZtd3HztFnw7XIkL
phD3XRRpsk1faeWUdx2OMGS90TNn5nHcEWbvj44RxJXti2JUDwx1sdjxz38tZklfAuaNEoqSGcXd
W7Pq6NvkEfvzZ7Pd7A+h86DpeXVSqhUUu4P71xRayQ4DBUK8iSYqZxTW2CoKSgo+v00BMzvYxD0R
A0TV3n9RYzYYwdTiNA+k07X9Lrih6OH7TwNYKhchrGLZUhfxexchmoZn/xzHmpfaECtCpWbk6nik
jf9UeiIzRQacHNh6p6FpkfUS4RGsfDn9pghY0Bq8gqUB/5CsExaD0FJYzn6PQRPeUlmhCf/WG1Q0
ttbtKnaGeNhgK8Wibu7ziE1p0zYnPOIIQzGmppDXeUmrp/BRkcVRdYJTCtjaYMZ4h8tfR41vp+tk
iUJzjFjcGQ5jngYlpwOg2HeSv4xJnEpPDQeI+1Zi/sOI+bBaCgUzUC3bEdBWbF32jVUQM9RAOCeg
xSR3/7o8OpC01UsxBMEdohpFyury7IqqnprP3aFUDwXBPXBZbjI8mxLUtwlQkH9WkJdAM/Q12JHw
MlxlGPgkjysHwOooPQcxnGI3DPV3bvbQus8pFxevzO0+mZuexo+Q/nJLwEfMItCbPTbUvZ+27sPA
fXBpV8TIyWaJf02xpHAovZVms5dAiXmEOrpWpCg8gvyaQbpBUZ83x4q7b14cx3wII+GQpsubY/tF
jWc+aiHyvjudbb4wN5ZKNoAEMjPIAVEhYM5N/L5V6js+72FwvXYdu9C5LkdcvoGHY0vJJHFVTbNc
jyEd9Np6OsLMUB+odzDlyyJq4PHntXCifipRY6ZRgxkRQYhGC5ZxTMXMW6CFJeAlIzuKFCgSmhVE
gCKPuYKgGCMwfhFAePhv4BSI9vKYBz/R0oCI8i6uEEJEV957taFIA6KtLC0wUDBIgEOyUkkxVFlO
v8Ya+Gyf7sPOhUOLEo9OjRwkXDf66Z7soFRmgAcgLN8QOm4ZdM0wS2bSQ3tC4DQMV+IY+z161iWZ
PAOLgTgb/YbHCzk0cQW+1JE8TAaoZ+XAILVKo11Q9xAnQS0SDSHB4EQTaug8kk1FAnyQSKtAzWW5
qZnRvsgwDEO4oxDuX4ldTAGxsY2kUpjtGcp3RjCCd9EFHJSd3h5DvOJDockhDorTscM4oJMM4pKg
INvcN7BsersuMlsu7olJ7t57Hbm4tJ1lbKNNgIbaQ3iKZmRPzrsCZYl0XiJ6yx/K1xjPbVsXDCyZ
JlVJiDjEgSbMPNEDtSevfffi4O1B2LdcdrgLGKwQITYSrxH0IhphnLnMrAaE0ZGiy8SgFWaITKAn
+nQG0PYkchzL39oZbNLRqt5++i9SGmZxMFpNM88TW9jRKoAXqxEQQLLm/WilTJCMnunznNQJaDhu
NUYcgmkbeqCTuJhwUltKj0in8n+LuSKcKEhyOVoXgA==


More information about the bazaar mailing list