Rev 123: Add a new function for finding what mainline rev merged a given revision. in http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk
John Arbash Meinel
john at arbash-meinel.com
Tue May 4 22:40:07 BST 2010
At http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk
------------------------------------------------------------
revno: 123
revision-id: john at arbash-meinel.com-20100504213957-x1so1mt3f13dxo4b
parent: john at arbash-meinel.com-20100503200953-2jhuonnhalqj471h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Tue 2010-05-04 16:39:57 -0500
message:
Add a new function for finding what mainline rev merged a given revision.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py 2010-05-03 19:58:53 +0000
+++ b/__init__.py 2010-05-04 21:39:57 +0000
@@ -133,7 +133,7 @@
for rev_id in rev_ids]
else:
assert method == 'db-range-multi'
- revno_map = query.get_dotted_revno_range_multi(rev_ids)
+ revno_map = query.get_dotted_revnos(rev_ids)
revnos = [(revno_map[rev_id], rev_id) for rev_id in rev_ids]
trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
tdelta = time.time() - t
@@ -202,7 +202,7 @@
for r in revno_list]
else:
assert method == 'db-range-multi'
- revno_map = query.get_dotted_revno_range_multi(rev_ids)
+ revno_map = query.get_dotted_revnos(rev_ids)
revnos = [(revno_map[rev_id], rev_id) for rev_id in rev_ids]
trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
tdelta = time.time() - t
@@ -471,7 +471,7 @@
'revision_id => dotted_revno')
return _orig_do_rev_id_to_dotted(self, revision_id)
t1 = time.clock()
- revision_id_map = query.get_dotted_revno_range_multi([revision_id])
+ revision_id_map = query.get_dotted_revnos([revision_id])
t2 = time.clock()
if 'history_db' in debug.debug_flags:
trace.note('history_db rev=>dotted %s took %.3fs, %.3fs to init,'
=== modified file 'history_db.py'
--- a/history_db.py 2010-05-03 20:09:53 +0000
+++ b/history_db.py 2010-05-04 21:39:57 +0000
@@ -1377,44 +1377,25 @@
self._stats['query_time'] += (time.time() - t)
return None
- def get_dotted_revno_range(self, revision_id):
- """Determine the dotted revno, using the range info, etc."""
- t = time.time()
- tip_db_id = self._branch_tip_db_id
- if tip_db_id is not None:
- raise TipNotImported(self._branch, self._branch_tip_rev_id)
- revno = None
- while tip_db_id is not None:
- self._stats['num_steps'] += 1
- range_res = self._get_cursor().execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (tip_db_id,)).fetchone()
- if range_res is None:
- revno_res = self._get_cursor().execute(
- "SELECT revno FROM dotted_revno"
- " WHERE tip_revision = ? AND merged_revision = ?",
- (tip_db_id, rev_db_id)).fetchone()
- next_db_id = self._get_lh_parent_db_id(tip_db_id)
- else:
- pkey, next_db_id = range_res
- revno_res = self._get_cursor().execute(
- "SELECT revno FROM dotted_revno, mainline_parent"
- " WHERE tip_revision = mainline_parent.revision"
- " AND mainline_parent.range = ?"
- " AND merged_revision = ?"
- " LIMIT 1 -- hint, should always be only 1",
- (pkey, rev_db_id)).fetchone()
- tip_db_id = next_db_id
- if revno_res is not None:
- revno = tuple(map(int, revno_res[0].split('.')))
- break
- self._stats['query_time'] += (time.time() - t)
- return revno
+ def _get_range_key_and_tail(self, tip_db_id):
+ """Return the best range w/ head = tip_db_id or None."""
+ range_res = self._get_cursor().execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (tip_db_id,)).fetchone()
+ if range_res is None:
+ tail = self._get_lh_parent_db_id(tip_db_id)
+ return None, tail
+ return range_res
+ # Compatibility thunk use get_dotted_revnos instead
def get_dotted_revno_range_multi(self, revision_ids):
+ """See get_dotted_revnos"""
+ return self.get_dotted_revnos(revision_ids)
+
+ def get_dotted_revnos(self, revision_ids):
"""Determine the dotted revno, using the range info, etc."""
self.ensure_branch_tip()
t = time.time()
@@ -1433,22 +1414,15 @@
revnos = {}
while tip_db_id is not None and db_ids:
self._stats['num_steps'] += 1
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (tip_db_id,)).fetchone()
- if range_res is None:
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
revno_res = cursor.execute(_add_n_params(
"SELECT merged_revision, revno FROM dotted_revno"
" WHERE tip_revision = ?"
" AND merged_revision IN (%s)",
len(db_ids)),
[tip_db_id] + list(db_ids)).fetchall()
- next_db_id = self._get_lh_parent_db_id(tip_db_id)
else:
- pkey, next_db_id = range_res
revno_res = cursor.execute(_add_n_params(
"SELECT merged_revision, revno"
" FROM dotted_revno, mainline_parent"
@@ -1456,7 +1430,7 @@
" AND mainline_parent.range = ?"
" AND merged_revision IN (%s)",
len(db_ids)),
- [pkey] + list(db_ids)).fetchall()
+ [range_key] + list(db_ids)).fetchall()
tip_db_id = next_db_id
for db_id, revno in revno_res:
db_ids.discard(db_id)
@@ -1477,13 +1451,8 @@
cursor = self._get_cursor()
while tip_db_id is not None and revno_strs:
self._stats['num_steps'] += 1
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (tip_db_id,)).fetchone()
- if range_res is None:
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
revision_res = cursor.execute(_add_n_params(
"SELECT revision_id, revno"
" FROM dotted_revno, revision"
@@ -1491,9 +1460,7 @@
" AND tip_revision = ?"
" AND revno IN (%s)", len(revno_strs)),
[tip_db_id] + list(revno_strs)).fetchall()
- next_db_id = self._get_lh_parent_db_id(tip_db_id)
else:
- pkey, next_db_id = range_res
revision_res = cursor.execute(_add_n_params(
"SELECT revision_id, revno"
" FROM dotted_revno, mainline_parent, revision"
@@ -1501,7 +1468,7 @@
" AND merged_revision = revision.db_id"
" AND mainline_parent.range = ?"
" AND revno IN (%s)", len(revno_strs)),
- [pkey] + list(revno_strs)).fetchall()
+ [range_key] + list(revno_strs)).fetchall()
tip_db_id = next_db_id
for revision_id, revno_str in revision_res:
dotted = tuple(map(int, revno_str.split('.')))
@@ -1510,6 +1477,54 @@
self._stats['query_time'] += (time.time() - t)
return revno_map
+ def get_merged_into(self, revision_ids):
+ """Determine what mainline revisions merged the given revisions."""
+ self.ensure_branch_tip()
+ t = time.time()
+ tip_db_id = self._branch_tip_db_id
+ if tip_db_id is None:
+ raise TipNotImported(self._branch, self._branch_tip_rev_id)
+ cursor = self._get_cursor()
+ db_ids = set()
+ db_id_to_rev_id = {}
+ for rev_id in revision_ids:
+ db_id = self._get_db_id(rev_id)
+ if db_id is None:
+ import pdb; pdb.set_trace()
+ db_ids.add(db_id)
+ db_id_to_rev_id[db_id] = rev_id
+ revision_to_mainline_map = {}
+ while tip_db_id is not None and db_ids:
+ self._stats['num_steps'] += 1
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
+ mainline_res = cursor.execute(_add_n_params(
+ "SELECT revision_id, merged_revision"
+ " FROM dotted_revno, revision"
+ " WHERE tip_revision = ?"
+ " AND tip_revision = revision.db_id"
+ " AND merged_revision IN (%s)",
+ len(db_ids)),
+ [tip_db_id] + list(db_ids)).fetchall()
+ else:
+ mainline_res = cursor.execute(_add_n_params(
+ "SELECT revision_id, merged_revision"
+ " FROM dotted_revno, mainline_parent, revision"
+ " WHERE tip_revision = mainline_parent.revision"
+ " AND tip_revision = db_id"
+ " AND mainline_parent.range = ?"
+ " AND merged_revision IN (%s)",
+ len(db_ids)),
+ [range_key] + list(db_ids)).fetchall()
+ tip_db_id = next_db_id
+ for mainline_revision_id, merged_db_id in mainline_res:
+ db_ids.discard(merged_db_id)
+ revision_to_mainline_map[db_id_to_rev_id[merged_db_id]] = \
+ mainline_revision_id
+ self._stats['query_time'] += (time.time() - t)
+ return revision_to_mainline_map
+
+
def walk_mainline(self):
"""Walk the db, and grab all the mainline identifiers."""
t = time.time()
@@ -1539,16 +1554,9 @@
:return: (range_or_None, next_db_id)
"""
cursor = self._get_cursor()
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (head_db_id,)).fetchone()
- if range_res is None:
- parent_db_id = self._get_lh_parent_db_id(head_db_id)
- return None, parent_db_id
- range_key, tail_db_id = range_res
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
+ return None, next_db_id
# TODO: Is ORDER BY dist ASC expensive? We know a priori that the list
# is probably already in sorted order, but does sqlite know that?
range_db_ids = cursor.execute(
@@ -1556,7 +1564,7 @@
" WHERE range = ? ORDER BY dist ASC",
(range_key,)).fetchall()
db_ids = [r[0] for r in range_db_ids]
- return db_ids, tail_db_id
+ return db_ids, next_db_id
def walk_mainline_using_ranges(self):
t = time.time()
@@ -1656,26 +1664,19 @@
cursor = self._get_cursor()
while db_id is not None:
self._stats['num_steps'] += 1
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (db_id,)).fetchone()
- if range_res is None:
- next_db_id = self._get_lh_parent_db_id(db_id)
+ range_key, next_db_id = self._get_range_key_and_tail(db_id)
+ if range_key is None:
merged_revs = cursor.execute(
"SELECT merged_revision FROM dotted_revno"
" WHERE tip_revision = ?",
(db_id,)).fetchall()
all_ancestors.update([r[0] for r in merged_revs])
else:
- pkey, next_db_id = range_res
merged_revs = cursor.execute(
"SELECT merged_revision FROM dotted_revno, mainline_parent"
" WHERE tip_revision = mainline_parent.revision"
" AND mainline_parent.range = ?",
- (pkey,)).fetchall()
+ [range_key]).fetchall()
all_ancestors.update([r[0] for r in merged_revs])
db_id = next_db_id
self._stats['query_time'] += (time.time() - t)
@@ -1691,28 +1692,21 @@
return tip_db_id
self._stats['num_steps'] += 1
self._stats['step_find_tip_containing'] += 1
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (tip_db_id,)).fetchone()
- if range_res is None:
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
present_res = cursor.execute(
"SELECT 1 FROM dotted_revno"
" WHERE tip_revision = ?"
" AND merged_revision = ?",
[tip_db_id, merged_db_id]).fetchone()
- next_db_id = self._get_lh_parent_db_id(tip_db_id)
else:
- pkey, next_db_id = range_res
present_res = cursor.execute(
"SELECT 1"
" FROM dotted_revno, mainline_parent"
" WHERE tip_revision = mainline_parent.revision"
" AND mainline_parent.range = ?"
" AND merged_revision = ?",
- [pkey, merged_db_id]).fetchone()
+ [range_key, merged_db_id]).fetchone()
if present_res is not None:
# We found a tip that contains merged_db_id
return tip_db_id
@@ -1729,13 +1723,8 @@
while tip_db_id is not None:
self._stats['num_steps'] += 1
self._stats['step_get_merge_sorted'] += 1
- range_res = cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (tip_db_id,)).fetchone()
- if range_res is None:
+ range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)
+ if range_key is None:
merged_res = cursor.execute(
"SELECT db_id, revision_id, merge_depth, revno,"
" end_of_merge"
@@ -1744,9 +1733,7 @@
" AND db_id = merged_revision"
" ORDER BY dist",
(tip_db_id,)).fetchall()
- next_db_id = self._get_lh_parent_db_id(tip_db_id)
else:
- pkey, next_db_id = range_res
# NOTE: Adding the ORDER BY costs us 981ms - 844ms = 137ms when
# doing 'bzr log -n0 -r -10..-1' on bzr.dev.
# That seems like a lot. Extracting them without sorting
@@ -1765,7 +1752,7 @@
" AND mainline_parent.range = ?"
" AND db_id = merged_revision",
# " ORDER BY mainline_parent.dist, dotted_revno.dist",
- [pkey]).fetchall()
+ [range_key]).fetchall()
if found_start:
for db_id, r_id, depth, revno_str, eom in merged_res:
if stop_db_id is not None and db_id == stop_db_id:
=== modified file 'test_importer.py'
--- a/test_importer.py 2010-04-30 19:08:46 +0000
+++ b/test_importer.py 2010-05-04 21:39:57 +0000
@@ -776,10 +776,14 @@
class TestQuerier(TestCaseWithGraphs):
- def test_importer_lock(self):
+ def get_db_path(self):
fn, temp = tempfile.mkstemp(prefix='test-bzr-history-db-', suffix='.db')
os.close(fn)
self.addCleanup(os.remove, temp)
+ return temp
+
+ def test_importer_lock(self):
+ temp = self.get_db_path()
b = self.make_interesting_branch()
b._tip_revision = 'I'
importer = history_db.Importer(temp, b, incremental=False)
@@ -813,3 +817,14 @@
" AND revision_id = ?",
('O',)).fetchone()
self.assertIsNot(None, res)
+
+ def test_get_merged_into(self):
+ db_path = self.get_db_path()
+ b = self.make_interesting_branch()
+ importer = history_db.Importer(db_path, b, incremental=False)
+ importer.do_import()
+ del importer
+ query = history_db.Querier(db_path, b)
+ rev_to_mainline_map = query.get_merged_into(['E', 'F', 'H', 'L'])
+ self.assertEqual({'E': 'G', 'F': 'G', 'H': 'I', 'L': 'O'},
+ rev_to_mainline_map)
More information about the bazaar-commits
mailing list