Rev 106: Iron out some bugs. in http://bzr.arbash-meinel.com/branches/bzr/history_db/trunk
John Arbash Meinel
john at arbash-meinel.com
Wed Apr 21 23:23:47 BST 2010
At http://bzr.arbash-meinel.com/branches/bzr/history_db/trunk
------------------------------------------------------------
revno: 106
revision-id: john at arbash-meinel.com-20100421222335-1a127vu1g36jh1za
parent: john at arbash-meinel.com-20100421210738-121p9mv2tokku564
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2010-04-21 17:23:35 -0500
message:
Iron out some bugs.
It now auto-imports on demand for any of dotted=>rev rev=>dotted and iter_merge.
It can be a bit slow, but not terrible. And it means it is always correct
and the next log -n0 should be faster.
It does slow down log -n1 -r-10..-1 slightly, 700ms=>750ms or so. I'm not
sure if that really matters.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py 2010-04-21 21:07:38 +0000
+++ b/__init__.py 2010-04-21 22:23:35 +0000
@@ -386,7 +386,7 @@
query = getattr(a_branch, '_history_db_querier', _singleton)
if query is not _singleton:
if query is not None:
- query._db_conn.close()
+ query.close()
del a_branch._history_db_querier
return _orig_clear_cached_state(a_branch)
@@ -400,9 +400,7 @@
"""
t0 = time.clock()
query = _get_querier(self)
- if query is not None:
- query.ensure_branch_tip()
- if query is None or query._branch_tip_db_id is None:
+ if query is None:
# TODO: Consider other cases where we may want to fall back, like
# special arguments, etc that we don't handle well yet.
trace.mutter('history_db falling back to original'
@@ -456,9 +454,7 @@
return revno
t0 = time.clock()
query = _get_querier(self)
- if query is not None:
- query.ensure_branch_tip()
- if query is None or query._branch_tip_db_id is None:
+ if query is None:
trace.mutter('history_db falling back to original'
'revision_id => dotted_revno')
return _orig_do_rev_id_to_dotted(self, revision_id)
@@ -473,13 +469,14 @@
if revision_id not in revision_id_map:
trace.mutter('history_db failed to find a mapping for {%s},'
'falling back' % (revision_id,))
- return _orig_do_rev_id_to_dotted(self, revno)
+ return _orig_do_rev_id_to_dotted(self, revision_id)
return revision_id_map[revision_id]
def _history_db_dotted_revno_to_revision_id(self, revno):
"""See Branch._do_dotted_revno_to_revision_id."""
# revno should be a dotted revno, aka either 1-part or 3-part tuple
+ import pdb; pdb.set_trace()
t0 = time.clock()
query = _get_querier(self)
if query is None:
@@ -503,6 +500,7 @@
def _history_db_post_change_branch_tip_hook(params):
"""Run when the tip of a branch changes revision_id."""
+ import pdb; pdb.set_trace()
t0 = time.clock()
import pprint
# TODO: This requires a round-trip to the remote server to find out whether
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-21 21:07:38 +0000
+++ b/history_db.py 2010-04-21 22:23:35 +0000
@@ -367,44 +367,49 @@
children = {}
parent_map = {}
known = {}
- while needed:
- rev_id = needed.pop()
- if rev_id in known:
- # We may add particular parents multiple times, just ignore
- # them once they've been found
- continue
- res = self._cursor.execute("SELECT gdfo"
- " FROM revision WHERE revision_id = ?",
- (rev_id,)).fetchone()
- if res is not None:
- known[rev_id] = res[0]
- continue
- # We don't have this entry recorded yet, add the parents to the
- # search
- pmap = self._branch.repository.get_parent_map([rev_id])
- parent_map.update(pmap)
- parent_ids = pmap.get(rev_id, None)
- if parent_ids is None or parent_ids == NULL_PARENTS:
- # We can insert this rev directly, because we know its gdfo,
- # as it has no parents.
- parent_map[rev_id] = ()
- self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"
- " VALUES (?, ?)", (rev_id, 1))
- # Wrap around to populate known quickly
- needed.append(rev_id)
- if parent_ids is None:
- # This is a ghost, add it to the table
- self._cursor.execute("INSERT INTO ghost (db_id)"
- " SELECT db_id FROM revision"
- " WHERE revision_id = ?",
- (rev_id,))
- continue
- for parent_id in pmap[rev_id]:
- if parent_id not in known:
- if parent_id not in all_needed:
- needed.append(parent_id)
- all_needed.add(parent_id)
- children.setdefault(parent_id, []).append(rev_id)
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ while needed:
+ pb.update('Finding ancestry', len(all_needed), len(all_needed))
+ rev_id = needed.pop()
+ if rev_id in known:
+ # We may add particular parents multiple times, just ignore
+ # them once they've been found
+ continue
+ res = self._cursor.execute(
+ "SELECT gdfo FROM revision WHERE revision_id = ?",
+ [rev_id]).fetchone()
+ if res is not None:
+ known[rev_id] = res[0]
+ continue
+ # We don't have this entry recorded yet, add the parents to the
+ # search
+ pmap = self._branch.repository.get_parent_map([rev_id])
+ parent_map.update(pmap)
+ parent_ids = pmap.get(rev_id, None)
+ if parent_ids is None or parent_ids == NULL_PARENTS:
+ # We can insert this rev directly, because we know its
+ # gdfo, as it has no parents.
+ parent_map[rev_id] = ()
+ self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"
+ " VALUES (?, ?)", (rev_id, 1))
+ # Wrap around to populate known quickly
+ needed.append(rev_id)
+ if parent_ids is None:
+ # This is a ghost, add it to the table
+ self._cursor.execute("INSERT INTO ghost (db_id)"
+ " SELECT db_id FROM revision"
+ " WHERE revision_id = ?",
+ (rev_id,))
+ continue
+ for parent_id in pmap[rev_id]:
+ if parent_id not in known:
+ if parent_id not in all_needed:
+ needed.append(parent_id)
+ all_needed.add(parent_id)
+ children.setdefault(parent_id, []).append(rev_id)
+ finally:
+ pb.finished()
return known, parent_map, children
def _compute_gdfo_and_insert(self, known, children, parent_map):
@@ -1186,34 +1191,63 @@
self._branch = a_branch
self._branch_tip_rev_id = a_branch.last_revision()
self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+ self._tip_is_imported = False
self._stats = defaultdict(lambda: 0)
+ def _get_cursor(self):
+ if self._cursor is not None:
+ return self._cursor
+ db_conn = dbapi2.connect(self._db_path)
+ self._db_conn = db_conn
+ self._cursor = self._db_conn.cursor()
+ return self._cursor
+
def ensure_branch_tip(self):
"""Ensure that the branch tip has been imported.
This will run Importer if it has not.
"""
- if self._branch_tip_db_id is not None:
- # It has been imported
- return
+ if self._branch_tip_db_id is not None and self._tip_is_imported:
+ return
+ if self._branch_tip_db_id is None:
+ # This revision has not been seen by the DB, so we know it isn't
+ # imported
+ self._import_tip()
+ return
+ if self._is_imported_db_id(self._branch_tip_db_id):
+ # This revision was seen, and imported
+ self._tip_is_imported = True
+ return
+ self._import_tip()
+
+ def _import_tip(self):
if self._cursor is not None:
- self._db_conn.close()
- self._cursor = None
+ self.close()
+ t = time.time()
importer = Importer(self._db_path, self._branch,
tip_revision_id=self._branch_tip_rev_id,
incremental=True)
importer.do_import()
+ tdelta = time.time() - t
+ trace.note('imported %d nodes on-the-fly in %.3fs'
+ % (importer._stats.get('total_nodes_inserted', 0), tdelta))
self._db_conn = importer._db_conn
self._cursor = importer._cursor
self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
-
- def _get_cursor(self):
- if self._cursor is not None:
- return self._cursor
- db_conn = dbapi2.connect(self._db_path)
- self._db_conn = db_conn
- self._cursor = self._db_conn.cursor()
- return self._cursor
+ self._tip_is_imported = True
+
+ def _is_imported_db_id(self, tip_db_id):
+ res = self._get_cursor().execute(
+ "SELECT count(*) FROM dotted_revno"
+ " WHERE tip_revision = ?"
+ " AND tip_revision = merged_revision",
+ (tip_db_id,)).fetchone()
+ return res[0] > 0
+
+ def close(self):
+ self._db_conn.close()
+ self._db_conn = None
+ self._cursor = None
def _get_db_id(self, revision_id):
try:
@@ -1356,8 +1390,9 @@
def get_dotted_revno_range_multi(self, revision_ids):
"""Determine the dotted revno, using the range info, etc."""
+ self.ensure_branch_tip()
+ t = time.time()
cursor = self._get_cursor()
- t = time.time()
tip_db_id = self._branch_tip_db_id
if tip_db_id is None:
raise TipNotImported(self._branch, self._branch_tip_rev_id)
@@ -1406,8 +1441,9 @@
def get_revision_ids(self, revnos):
"""Map from a dotted-revno back into a revision_id."""
+ self.ensure_branch_tip()
t = time.time()
- tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+ tip_db_id = self._branch_tip_db_id
# TODO: If tip_db_id is None, maybe we want to raise an exception here?
# To indicate that the branch has not been imported yet
revno_strs = set(['.'.join(map(str, revno)) for revno in revnos])
@@ -1426,7 +1462,7 @@
"SELECT revision_id, revno"
" FROM dotted_revno, revision"
" WHERE merged_revision = revision.db_id"
- " tip_revision = ?"
+ " AND tip_revision = ?"
" AND revno IN (%s)", len(revno_strs)),
[tip_db_id] + list(revno_strs)).fetchall()
next_db_id = self._get_lh_parent_db_id(tip_db_id)
@@ -1730,10 +1766,11 @@
stop is *always* exclusive. You can simulate the rest by careful
selection of stop.
"""
+ self.ensure_branch_tip()
t = time.time()
- tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+ tip_db_id = self._branch_tip_db_id
if tip_db_id is None:
- raise ValueError('tip not imported')
+ raise TipNotImported(self._branch, self._branch_tip_rev_id)
if start_revision_id is not None:
start_db_id = self._get_db_id(start_revision_id)
else:
=== modified file 'test_hooks.py'
--- a/test_hooks.py 2010-04-21 21:07:38 +0000
+++ b/test_hooks.py 2010-04-21 22:23:35 +0000
@@ -93,3 +93,10 @@
b.get_config().set_user_option('history_db_path', history_db_path)
self.assertEqual((2,),
history_db._history_db_revision_id_to_dotted_revno(b, 'B'))
+
+ def test_dotted_to_rev_not_imported(self):
+ history_db_path = self.get_history_db_path()
+ b, merge_sorted = self.make_simple_history_branch()
+ b.get_config().set_user_option('history_db_path', history_db_path)
+ self.assertEqual('B',
+ history_db._history_db_dotted_revno_to_revision_id(b, (2,)))
More information about the bazaar-commits
mailing list