Rev 88: iter_merge_sorted_revisions now basically works. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Mon Apr 12 21:58:14 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 88
revision-id: john at arbash-meinel.com-20100412205807-vvhx2cu0rd6bhu7f
parent: john at arbash-meinel.com-20100412200430-nocvykgmd2end0py
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-04-12 15:58:07 -0500
message:
iter_merge_sorted_revisions now basically works.
I don't think it is perfect yet, as we don't handle the various flags
that can be supplied for stop_rule.
However, time for 'bzr log -n0 -r -10..-1' or -r-100..-1 goes from
1.4s down to 0.97s.
And so far the result is 'correct'. I assume because the default
is 'with-merges', which should match the 'skip to lh parent and
treat lh parent as a stop rev.'
At least as long as both parts of the range are mainline revs.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py 2010-04-12 20:04:30 +0000
+++ b/__init__.py 2010-04-12 20:58:07 +0000
@@ -377,14 +377,35 @@
start_revision_id=start_revision_id,
stop_revision_id=stop_revision_id, stop_rule=stop_rule,
direction=direction)
- # TODO: Consider what to do if the branch has not been imported yet. My gut
- # feeling is that we really want to do the import at this time. Since
- # the user would want the data, and it is possible to update a cache
- # with new values and return them *faster* than you could get them
- # out from scratch.
- return _orig_iter_merge_sorted(self,
- start_revision_id=start_revision_id, stop_revision_id=stop_revision_id,
- stop_rule=stop_rule, direction=direction)
+ if stop_rule == 'exclude':
+ real_stop_revision_id = stop_revision_id
+ else:
+ # We have to get fancy about our stop revision, we'll use the existing
+ # filtering functions to trim things back out, for now, we just use the
+ # left-hand parent as the real stop revision
+ # TODO: Handle a ghost or a first-revision that doesn't have a lh
+ # parent
+ real_stop_revision_id = self.repository.get_parent_map(
+ [stop_revision_id])[stop_revision_id][0]
+ merge_sorted = query.iter_merge_sorted_revisions(
+ start_revision_id=start_revision_id,
+ stop_revision_id=real_stop_revision_id)
+ if real_stop_revision_id != stop_revision_id:
+ # Ask the existing branch code to do the special filtering
+ merge_sorted = _filter_merge_sorted(self, merge_sorted,
+ stop_revision_id, stop_rule)
+ merge_sorted = self._filter_non_ancestors(iter(merge_sorted))
+ if direction == 'reverse':
+ return merge_sorted
+ elif direction == 'forward':
+ return reversed(list(filtered))
+ else:
+ raise ValueError('invalid direction %r' % direction)
+
+
+def _filter_merge_sorted(self, merge_sorted, stop_revision_id, stop_rule):
+ """iter_merge_sorted_revisions has some crazy stop_rules, deal with it."""
+ return merge_sorted
def _history_db_revision_id_to_dotted_revno(self, revision_id):
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-12 19:57:07 +0000
+++ b/history_db.py 2010-04-12 20:58:07 +0000
@@ -1557,6 +1557,124 @@
self._stats['query_time'] += (time.time() - t)
return all_ancestors
+ def _find_tip_containing(self, tip_db_id, merged_db_id):
+ """Walk backwards until you find the tip that contains the given id."""
+ while tip_db_id is not None:
+ if tip_db_id == merged_db_id:
+ # A tip obviously contains itself
+ self._stats['step_find_tip_as_merged'] += 1
+ return tip_db_id
+ self._stats['num_steps'] += 1
+ self._stats['step_find_tip_containing'] += 1
+ range_res = self._cursor.execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (tip_db_id,)).fetchone()
+ if range_res is None:
+ present_res = self._cursor.execute(
+ "SELECT 1 FROM dotted_revno"
+ " WHERE tip_revision = ?"
+ " AND merged_revision = ?",
+ [tip_db_id, merged_db_id]).fetchone()
+ next_db_id = self._get_lh_parent_db_id(tip_db_id)
+ else:
+ pkey, next_db_id = range_res
+ present_res = self._cursor.execute(
+ "SELECT 1"
+ " FROM dotted_revno, mainline_parent"
+ " WHERE tip_revision = mainline_parent.revision"
+ " AND mainline_parent.range = ?"
+ " AND merged_revision = ?",
+ [pkey, merged_db_id]).fetchone()
+ if present_res is not None:
+ # We found a tip that contains merged_db_id
+ return tip_db_id
+ tip_db_id = next_db_id
+ return None
+
+ def _get_merge_sorted_range(self, tip_db_id, start_db_id, stop_db_id):
+ """Starting at the given tip, read all merge_sorted data until stop."""
+ if start_db_id is None or start_db_id == tip_db_id:
+ found_start = True
+ else:
+ found_start = False
+ while tip_db_id is not None:
+ self._stats['num_steps'] += 1
+ self._stats['step_get_merge_sorted'] += 1
+ range_res = self._cursor.execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (tip_db_id,)).fetchone()
+ if range_res is None:
+ merged_res = self._cursor.execute(
+ "SELECT db_id, revision_id, merge_depth, revno,"
+ " end_of_merge"
+ " FROM dotted_revno, revision"
+ " WHERE tip_revision = ?"
+ " AND db_id = merged_revision"
+ [tip_db_id]).fetchall()
+ next_db_id = self._get_lh_parent_db_id(tip_db_id)
+ else:
+ pkey, next_db_id = range_res
+ merged_res = self._cursor.execute(
+ "SELECT db_id, revision_id, merge_depth, revno,"
+ " end_of_merge"
+ " FROM dotted_revno, revision, mainline_parent"
+ " WHERE tip_revision = mainline_parent.revision"
+ " AND mainline_parent.range = ?"
+ " AND db_id = merged_revision",
+ [pkey]).fetchall()
+ if found_start:
+ for db_id, r_id, depth, revno_str, eom in merged_res:
+ if stop_db_id is not None and db_id == stop_db_id:
+ return
+ revno = tuple(map(int, revno_str.split('.')))
+ yield r_id, depth, revno, eom
+ else:
+ for info in merged_res:
+ if not found_start and info[0] == start_db_id:
+ found_start = True
+ if found_start:
+ if stop_db_id is not None and info[0] == stop_db_id:
+ return
+ db_id, r_id, depth, revno_str, eom = info
+ revno = tuple(map(int, revno_str.split('.')))
+ yield r_id, depth, revno, eom
+ tip_db_id = next_db_id
+
+ def iter_merge_sorted_revisions(self, start_revision_id=None,
+ stop_revision_id=None):
+ """See Branch.iter_merge_sorted_revisions()
+
+ Note that start and stop differ from the Branch implementation, because
+ stop is *always* exclusive. You can simulate the rest by careful
+ selection of stop.
+ """
+ t = time.time()
+ tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+ if start_revision_id is not None:
+ start_db_id = self._get_db_id(start_revision_id)
+ else:
+ start_db_id = tip_db_id
+ stop_db_id = None
+ if stop_revision_id is not None:
+ stop_db_id = self._get_db_id(stop_revision_id)
+ # Seek fast until we find start_db_id
+ merge_sorted = []
+ revnos = {}
+ tip_db_id = self._find_tip_containing(tip_db_id, start_db_id)
+ # Now that you have found the first tip containing the given start
+ # revision, pull in data until you walk off the history, or you find
+ # the stop revision
+ merge_sorted = list(
+ self._get_merge_sorted_range(tip_db_id, start_db_id, stop_db_id))
+ self._stats['query_time'] += (time.time() - t)
+ return merge_sorted
+
def heads(self, revision_ids):
"""Compute Graph.heads() on the given data."""
raise NotImplementedError(self.heads)
=== modified file 'schema.py'
--- a/schema.py 2010-04-09 14:52:25 +0000
+++ b/schema.py 2010-04-12 20:58:07 +0000
@@ -67,6 +67,9 @@
# TODO: Consider storing the data as 3-digit integer revnos, rather than a
# revno_str
+# TODO: I think we need to add an 'order' column, so that we guarantee the
+# order we get back from the database is the same order that we put them
+# into the database.
dotted_revno_t = """
CREATE TABLE dotted_revno (
tip_revision INTEGER REFERENCES revision NOT NULL,
More information about the bazaar-commits
mailing list