Rev 88: iter_merge_sorted_revisions now basically works. in http://bzr.arbash-meinel.com/plugins/history_db

Mon Apr 12 21:58:14 BST 2010

At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 88
revision-id: john at arbash-meinel.com-20100412205807-vvhx2cu0rd6bhu7f
parent: john at arbash-meinel.com-20100412200430-nocvykgmd2end0py
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-04-12 15:58:07 -0500
message:
  iter_merge_sorted_revisions now basically works.
  
  I don't think it is perfect yet, as we don't handle the various flags
  that can be supplied for stop_rule.
  However, time for 'bzr log -n0 -r -10..-1' or -r-100..-1 goes from
  1.4s down to 0.97s.
  And so far the result is 'correct'. I assume because the default
  is 'with-merges', which should match the 'skip to lh parent and
  treat lh parent as a stop rev.'
  At least as long as both parts of the range are mainline revs.
-------------- next part --------------
=== modified file '__init__.py'

--- a/__init__.py	2010-04-12 20:04:30 +0000
+++ b/__init__.py	2010-04-12 20:58:07 +0000
@@ -377,14 +377,35 @@
             start_revision_id=start_revision_id,
             stop_revision_id=stop_revision_id, stop_rule=stop_rule,
             direction=direction)
-    # TODO: Consider what to do if the branch has not been imported yet. My gut
-    #       feeling is that we really want to do the import at this time. Since
-    #       the user would want the data, and it is possible to update a cache
-    #       with new values and return them *faster* than you could get them
-    #       out from scratch.
-    return _orig_iter_merge_sorted(self,
-        start_revision_id=start_revision_id, stop_revision_id=stop_revision_id,
-        stop_rule=stop_rule, direction=direction)
+    if stop_rule == 'exclude':
+        real_stop_revision_id = stop_revision_id
+    else:
+        # We have to get fancy about our stop revision, we'll use the existing
+        # filtering functions to trim things back out, for now, we just use the
+        # left-hand parent as the real stop revision
+        # TODO: Handle a ghost or a first-revision that doesn't have a lh
+        #       parent
+        real_stop_revision_id = self.repository.get_parent_map(
+            [stop_revision_id])[stop_revision_id][0]
+    merge_sorted = query.iter_merge_sorted_revisions(
+                    start_revision_id=start_revision_id,
+                    stop_revision_id=real_stop_revision_id)
+    if real_stop_revision_id != stop_revision_id:
+        # Ask the existing branch code to do the special filtering
+        merge_sorted = _filter_merge_sorted(self, merge_sorted,
+                        stop_revision_id, stop_rule)
+    merge_sorted = self._filter_non_ancestors(iter(merge_sorted))
+    if direction == 'reverse':
+        return merge_sorted
+    elif direction == 'forward':
+        return reversed(list(filtered))
+    else:
+        raise ValueError('invalid direction %r' % direction)
+
+
+def _filter_merge_sorted(self, merge_sorted, stop_revision_id, stop_rule):
+    """iter_merge_sorted_revisions has some crazy stop_rules, deal with it."""
+    return merge_sorted
 
 
 def _history_db_revision_id_to_dotted_revno(self, revision_id):

=== modified file 'history_db.py'
--- a/history_db.py	2010-04-12 19:57:07 +0000
+++ b/history_db.py	2010-04-12 20:58:07 +0000
@@ -1557,6 +1557,124 @@
         self._stats['query_time'] += (time.time() - t)
         return all_ancestors
 
+    def _find_tip_containing(self, tip_db_id, merged_db_id):
+        """Walk backwards until you find the tip that contains the given id."""
+        while tip_db_id is not None:
+            if tip_db_id == merged_db_id:
+                # A tip obviously contains itself
+                self._stats['step_find_tip_as_merged'] += 1
+                return tip_db_id
+            self._stats['num_steps'] += 1
+            self._stats['step_find_tip_containing'] += 1
+            range_res = self._cursor.execute(
+                "SELECT pkey, tail"
+                "  FROM mainline_parent_range"
+                " WHERE head = ?"
+                " ORDER BY count DESC LIMIT 1",
+                (tip_db_id,)).fetchone()
+            if range_res is None:
+                present_res = self._cursor.execute(
+                    "SELECT 1 FROM dotted_revno"
+                    " WHERE tip_revision = ?"
+                    "   AND merged_revision = ?",
+                    [tip_db_id, merged_db_id]).fetchone()
+                next_db_id = self._get_lh_parent_db_id(tip_db_id)
+            else:
+                pkey, next_db_id = range_res
+                present_res = self._cursor.execute(
+                    "SELECT 1"
+                    "  FROM dotted_revno, mainline_parent"
+                    " WHERE tip_revision = mainline_parent.revision"
+                    "   AND mainline_parent.range = ?"
+                    "   AND merged_revision = ?",
+                    [pkey, merged_db_id]).fetchone()
+            if present_res is not None:
+                # We found a tip that contains merged_db_id
+                return tip_db_id
+            tip_db_id = next_db_id
+        return None
+
+    def _get_merge_sorted_range(self, tip_db_id, start_db_id, stop_db_id):
+        """Starting at the given tip, read all merge_sorted data until stop."""
+        if start_db_id is None or start_db_id == tip_db_id:
+            found_start = True
+        else:
+            found_start = False
+        while tip_db_id is not None:
+            self._stats['num_steps'] += 1
+            self._stats['step_get_merge_sorted'] += 1
+            range_res = self._cursor.execute(
+                "SELECT pkey, tail"
+                "  FROM mainline_parent_range"
+                " WHERE head = ?"
+                " ORDER BY count DESC LIMIT 1",
+                (tip_db_id,)).fetchone()
+            if range_res is None:
+                merged_res = self._cursor.execute(
+                    "SELECT db_id, revision_id, merge_depth, revno,"
+                    "       end_of_merge"
+                    "  FROM dotted_revno, revision"
+                    " WHERE tip_revision = ?"
+                    "   AND db_id = merged_revision"
+                    [tip_db_id]).fetchall()
+                next_db_id = self._get_lh_parent_db_id(tip_db_id)
+            else:
+                pkey, next_db_id = range_res
+                merged_res = self._cursor.execute(
+                    "SELECT db_id, revision_id, merge_depth, revno,"
+                    "       end_of_merge"
+                    "  FROM dotted_revno, revision, mainline_parent"
+                    " WHERE tip_revision = mainline_parent.revision"
+                    "   AND mainline_parent.range = ?"
+                    "   AND db_id = merged_revision",
+                    [pkey]).fetchall()
+            if found_start:
+                for db_id, r_id, depth, revno_str, eom in merged_res:
+                    if stop_db_id is not None and db_id == stop_db_id:
+                        return
+                    revno = tuple(map(int, revno_str.split('.')))
+                    yield r_id, depth, revno, eom
+            else:
+                for info in merged_res:
+                    if not found_start and info[0] == start_db_id:
+                        found_start = True
+                    if found_start:
+                        if stop_db_id is not None and info[0] == stop_db_id:
+                            return
+                        db_id, r_id, depth, revno_str, eom = info
+                        revno = tuple(map(int, revno_str.split('.')))
+                        yield r_id, depth, revno, eom
+            tip_db_id = next_db_id
+
+    def iter_merge_sorted_revisions(self, start_revision_id=None,
+                                    stop_revision_id=None):
+        """See Branch.iter_merge_sorted_revisions()
+
+        Note that start and stop differ from the Branch implementation, because
+        stop is *always* exclusive. You can simulate the rest by careful
+        selection of stop.
+        """
+        t = time.time()
+        tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+        if start_revision_id is not None:
+            start_db_id = self._get_db_id(start_revision_id)
+        else:
+            start_db_id = tip_db_id
+        stop_db_id = None
+        if stop_revision_id is not None:
+            stop_db_id = self._get_db_id(stop_revision_id)
+        # Seek fast until we find start_db_id
+        merge_sorted = []
+        revnos = {}
+        tip_db_id = self._find_tip_containing(tip_db_id, start_db_id)
+        # Now that you have found the first tip containing the given start
+        # revision, pull in data until you walk off the history, or you find
+        # the stop revision
+        merge_sorted = list(
+            self._get_merge_sorted_range(tip_db_id, start_db_id, stop_db_id))
+        self._stats['query_time'] += (time.time() - t)
+        return merge_sorted
+
     def heads(self, revision_ids):
         """Compute Graph.heads() on the given data."""
         raise NotImplementedError(self.heads)

=== modified file 'schema.py'
--- a/schema.py	2010-04-09 14:52:25 +0000
+++ b/schema.py	2010-04-12 20:58:07 +0000
@@ -67,6 +67,9 @@
 
 # TODO: Consider storing the data as 3-digit integer revnos, rather than a
 #       revno_str
+# TODO: I think we need to add an 'order' column, so that we guarantee the
+#       order we get back from the database is the same order that we put them
+#       into the database.
 dotted_revno_t = """
 CREATE TABLE dotted_revno (
     tip_revision INTEGER REFERENCES revision NOT NULL,