Rev 111: Start looking at improving build performance. in http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk

John Arbash Meinel john at arbash-meinel.com
Tue Apr 27 22:22:48 BST 2010


At http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk

------------------------------------------------------------
revno: 111
revision-id: john at arbash-meinel.com-20100427212204-gzktjcesj9u9eakp
parent: john at arbash-meinel.com-20100422212721-0ztw9wz7ohdg8afz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Tue 2010-04-27 16:22:04 -0500
message:
  Start looking at improving build performance.
-------------- next part --------------
=== modified file 'schema.py'
--- a/schema.py	2010-04-15 17:12:00 +0000
+++ b/schema.py	2010-04-27 21:22:04 +0000
@@ -184,26 +184,28 @@
     #      more than 10% of the data in the table, it is faster to do an I/O
     #      friendly sequential scan, than to do a random order scan.
     remaining = [r for r in revision_ids if r not in rev_id_to_db_id]
-    cur = 0
-    missing = set()
     # res = cursor.execute('SELECT revision_id, db_id FROM revision')
     # for rev_id, db_id in res.fetchall():
     #     if rev_id in missing:
     #         result[rev_id] = db_id
     #         missing.discard(rev_id)
-    while cur < len(remaining):
-        next = remaining[cur:cur+_BATCH_SIZE]
-        cur += _BATCH_SIZE
-        res = cursor.execute('SELECT revision_id, db_id FROM revision'
-                             ' WHERE revision_id in (%s)'
-                             % (', '.join('?'*len(next))),
-                             tuple(next))
-        local_missing = set(next)
-        for rev_id, db_id in res.fetchall():
-            rev_id_to_db_id[rev_id] = db_id
-            db_id_to_rev_id[db_id] = rev_id
-            local_missing.discard(rev_id)
-        missing.update(local_missing)
+    def find_existing():
+        cur = 0
+        missing = set()
+        for cur in xrange(0, len(remaining), _BATCH_SIZE):
+            next = remaining[cur:cur+_BATCH_SIZE]
+            res = cursor.execute('SELECT revision_id, db_id FROM revision'
+                                 ' WHERE revision_id in (%s)'
+                                 % (', '.join('?'*len(next))),
+                                 tuple(next))
+            local_missing = set(next)
+            for rev_id, db_id in res.fetchall():
+                rev_id_to_db_id[rev_id] = db_id
+                db_id_to_rev_id[db_id] = rev_id
+                local_missing.discard(rev_id)
+            missing.update(local_missing)
+        return missing
+    missing = find_existing()
     if missing:
         ghosts = set()
         def get_gdfo(rev_id):
@@ -213,9 +215,11 @@
                 if node.parent_keys is None:
                     ghosts.add(rev_id)
             return node.gdfo
-        cursor.executemany('INSERT INTO revision (revision_id, gdfo)'
-                           ' VALUES (?, ?)',
-                           [(m, get_gdfo(m)) for m in missing])
+        def insert_new():
+            cursor.executemany('INSERT INTO revision (revision_id, gdfo)'
+                               ' VALUES (?, ?)',
+                               [(m, get_gdfo(m)) for m in missing])
+        insert_new()
         ensure_revisions(cursor, missing, rev_id_to_db_id,
                          db_id_to_rev_id, graph=graph)
         if ghosts:



More information about the bazaar-commits mailing list