Rev 30: Do similar work to optimize the dotted revno lookups. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Sun Apr 4 05:37:00 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 30
revision-id: john at arbash-meinel.com-20100404043654-hlxk78v319ldf5lc
parent: john at arbash-meinel.com-20100402224358-yk2z3ukcwo504v66
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Sat 2010-04-03 23:36:54 -0500
message:
Do similar work to optimize the dotted revno lookups.
1.487s bzr
0.587s db-rev-id
0.507s db-db-id
0.031s db-range
0.026s db-range-multi
It would be good to look at the multi-way query plan and how it makes it so fast.
What indices is it using, what order, what joins, etc.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py 2010-04-02 22:43:58 +0000
+++ b/__init__.py 2010-04-04 04:36:54 +0000
@@ -50,6 +50,13 @@
trace.note('Stats:\n%s' % (pprint.pformat(dict(importer._stats)),))
+_dotted_revno_walk_types = registry.Registry()
+_dotted_revno_walk_types.register('db-rev-id', None)
+_dotted_revno_walk_types.register('db-db-id', None)
+_dotted_revno_walk_types.register('db-range', None)
+_dotted_revno_walk_types.register('db-range-multi', None)
+_dotted_revno_walk_types.register('bzr', None)
+
class cmd_get_dotted_revno(commands.Command):
"""Query the db for a dotted revno.
"""
@@ -59,11 +66,13 @@
option.Option('directory', type=unicode, short_name='d',
help='Import this location instead of "."'),
'revision',
- option.Option('use-db-ids',
- help='Do the queries using database ids'),
+ option.RegistryOption('method',
+ help='How do you want to do the walking.',
+ converter=str, registry=_dotted_revno_walk_types)
]
- def run(self, directory='.', db=None, revision=None, use_db_ids=False):
+ def run(self, directory='.', db=None, revision=None,
+ method=None):
import pprint
from bzrlib.plugins.history_db import history_db
from bzrlib import branch
@@ -73,13 +82,35 @@
b.lock_read()
try:
rev_ids = [rspec.as_revision_id(b) for rspec in revision]
- query = history_db.Querier(db, b)
- if use_db_ids:
- revnos = [(query.get_dotted_revno_db_ids(rev_id), rev_id)
- for rev_id in rev_ids]
+ t = time.time()
+ if method == 'bzr':
+ # Make sure to use a different branch, because the existing one
+ # has already cached the mainline, etc in decoding the supplied
+ # revision ids.
+ b2 = b.bzrdir.open_branch()
+ b2.lock_read()
+ try:
+ revnos = [(b2.revision_id_to_dotted_revno(r), r)
+ for r in rev_ids]
+ finally:
+ b2.unlock()
else:
- revnos = [(query.get_dotted_revno(rev_id), rev_id)
- for rev_id in rev_ids]
+ query = history_db.Querier(db, b)
+ if method == 'db-db-id':
+ revnos = [(query.get_dotted_revno_db_ids(rev_id), rev_id)
+ for rev_id in rev_ids]
+ elif method == 'db-rev-id':
+ revnos = [(query.get_dotted_revno(rev_id), rev_id)
+ for rev_id in rev_ids]
+ elif method == 'db-range':
+ revnos = [(query.get_dotted_revno_range(rev_id), rev_id)
+ for rev_id in rev_ids]
+ else:
+ assert method == 'db-range-multi'
+ revno_map = query.get_dotted_revno_range_multi(rev_ids)
+ revnos = [(revno_map[rev_id], rev_id) for rev_id in rev_ids]
+ trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
+ tdelta = time.time() - t
revno_strs = []
max_len = 0
for revno, rev_id in revnos:
@@ -94,7 +125,7 @@
for s, r in revno_strs]))
finally:
b.unlock()
- trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
+ trace.note('Time: %.3fs' % (tdelta,))
_mainline_walk_types = registry.Registry()
@@ -198,7 +229,7 @@
elif method == 'bzr-kg':
kg = b.repository.revisions.get_known_graph_ancestry(
[(b.last_revision(),)])
- ancestors = len(kg._nodes)
+ ancestors = kg._nodes
self.outf.write('Found %d ancestors\n' % (len(ancestors),))
trace.note('Time: %.3fs' % (time.time() - t,))
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-02 22:43:58 +0000
+++ b/history_db.py 2010-04-04 04:36:54 +0000
@@ -333,6 +333,87 @@
self._stats['query_time'] += (time.time() - t)
return None
+ def get_dotted_revno_range(self, revision_id):
+ """Determine the dotted revno, using the range info, etc."""
+ t = time.time()
+ tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+ rev_db_id = self._get_db_id(revision_id)
+ revno = None
+ while tip_db_id is not None:
+ self._stats['num_steps'] += 1
+ range_res = self._cursor.execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (tip_db_id,)).fetchone()
+ if range_res is None:
+ revno_res = self._cursor.execute(
+ "SELECT revno FROM dotted_revno"
+ " WHERE tip_revision = ? AND merged_revision = ?",
+ (tip_db_id, rev_db_id)).fetchone()
+ next_db_id = self._get_lh_parent_db_id(tip_db_id)
+ else:
+ pkey, next_db_id = range_res
+ revno_res = self._cursor.execute(
+ "SELECT revno FROM dotted_revno, mainline_parent"
+ " WHERE tip_revision = mainline_parent.revision"
+ " AND mainline_parent.range = ?"
+ " AND merged_revision = ?",
+ (pkey, rev_db_id)).fetchone()
+ tip_db_id = next_db_id
+ if revno_res is not None:
+ revno = tuple(map(int, revno_res[0].split('.')))
+ break
+ self._stats['query_time'] += (time.time() - t)
+ return revno
+
+ def get_dotted_revno_range_multi(self, revision_ids):
+ """Determine the dotted revno, using the range info, etc."""
+ t = time.time()
+ rev_id_to_db_id = {}
+ need_ids = [self._branch_tip_rev_id]
+ need_ids.extend(revision_ids)
+ schema.ensure_revisions(self._cursor, need_ids,
+ rev_id_to_db_id, graph=None)
+ tip_db_id = rev_id_to_db_id[self._branch_tip_rev_id]
+ db_id_to_rev_id = dict((d, r) for r, d in rev_id_to_db_id.iteritems())
+ db_ids = set([rev_id_to_db_id[r] for r in revision_ids])
+ revnos = {}
+ while tip_db_id is not None and db_ids:
+ self._stats['num_steps'] += 1
+ range_res = self._cursor.execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (tip_db_id,)).fetchone()
+ if range_res is None:
+ revno_res = self._cursor.execute(
+ "SELECT merged_revision, revno FROM dotted_revno"
+ " WHERE tip_revision = ?"
+ " AND merged_revision IN (%s)"
+ % (', '.join('?'*len(db_ids))),
+ [tip_db_id] + list(db_ids)).fetchall()
+ next_db_id = self._get_lh_parent_db_id(tip_db_id)
+ else:
+ pkey, next_db_id = range_res
+ revno_res = self._cursor.execute(
+ "SELECT merged_revision, revno"
+ " FROM dotted_revno, mainline_parent"
+ " WHERE tip_revision = mainline_parent.revision"
+ " AND mainline_parent.range = ?"
+ " AND merged_revision IN (%s)"
+ % (', '.join('?'*len(db_ids))),
+ [pkey] + list(db_ids)).fetchall()
+ tip_db_id = next_db_id
+ for db_id, revno in revno_res:
+ db_ids.discard(db_id)
+ revnos[db_id_to_rev_id[db_id]] = tuple(map(int,
+ revno.split('.')))
+ self._stats['query_time'] += (time.time() - t)
+ return revnos
+
def walk_mainline(self):
"""Walk the db, and grab all the mainline identifiers."""
t = time.time()
@@ -463,7 +544,7 @@
# capped at a maximum range.)
# 1.719s walk_ancestry
# 0.198s walk_ancestry_db_ids
- # 0.164s walk_mainline_using_ranges
+ # 0.164s walk_ancestry_range
return all_ancestors
def walk_ancestry_range_and_dotted(self):
More information about the bazaar-commits
mailing list