Rev 42: The search phase seems to be working and mostly correct. in http://bzr.arbash-meinel.com/plugins/history_db

Mon Apr 5 22:04:26 BST 2010

At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 42
revision-id: john at arbash-meinel.com-20100405210421-oxoi0dv9bbiy38p5
parent: john at arbash-meinel.com-20100405203239-v64mx9kanm0cayfz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-04-05 16:04:21 -0500
message:
  The search phase seems to be working and mostly correct.
  
  There are still a couple of TODOs that should be addressed, but nothing super
  critical.
-------------- next part --------------
=== modified file 'history_db.py'

--- a/history_db.py	2010-04-05 20:32:39 +0000
+++ b/history_db.py	2010-04-05 21:04:21 +0000
@@ -156,13 +156,13 @@
 
     def do_import(self, expand_all=False):
         if self._incremental:
+            inc_importer = _IncrementalImporter(self, self._branch_tip_rev_id)
             self._update_ancestry(self._branch_tip_rev_id)
             tip_db_id = self._rev_id_to_db_id[self._branch_tip_rev_id]
             (needed_mainline,
              imported_mainline_db_id) = self._find_needed_mainline(tip_db_id)
             # TODO: if imported_mainline_id is None, then we may as well just
             #       switch to full import, rather than incremental
-            self._import_mainline(needed_mainline, imported_mainline_db_id)
         merge_sorted = self._import_tip(self._branch_tip_rev_id)
         if not expand_all:
             return
@@ -242,15 +242,6 @@
                 self._db_conn.commit()
         return merge_sorted
 
-    def _import_mainline(self, mainline_db_ids, pre_mainline_id):
-        """Fill out the dotted_revno table for these mainline_db_ids.
-
-        :param mainline_db_ids: A list of mainline database ids, starting with
-            the most recent revision.
-        :param pre_mainline_id: The mainline db_id that *has* been imported
-            (may be None). Should just be parent(mainline_db_ids[-1])
-        """
-
     def _update_ancestry(self, new_tip_rev_id):
         """Walk the parents of this tip, updating 'revision' and 'parent'
 
@@ -474,7 +465,7 @@
 
         # Information from the dotted_revno table for revisions that are in the
         # already-imported mainline.
-        self._imported_dotted_revno_info = {}
+        self._imported_dotted_revno = {}
         # This is the gdfo of the current mainline revision search tip. This is
         # the threshold such that 
         self._imported_gdfo = None
@@ -514,7 +505,7 @@
         indicates that they could not be merged into already imported
         revisions, then we know automatically that they are
         new-and-interesting. Further, if they are present in
-        _imported_dotted_revno_info, then we know they are not interesting, and
+        _imported_dotted_revno, then we know they are not interesting, and
         we will stop searching them.
 
         Otherwise, we don't know for sure which category they fall into, so
@@ -527,19 +518,16 @@
         still_unknown = []
         min_gdfo = None
         for db_id in unknown:
+            if db_id in self._imported_dotted_revno:
+                # This should be removed as a search tip, we know it isn't
+                # interesting, it is an ancestor of an imported revision
+                self._search_tips.remove(db_id)
+                continue
             gdfo = self._known_gdfo[db_id]
             if gdfo >= self._imported_gdfo:
                 self._interesting_ancestor_ids.add(db_id)
             else:
-                # TODO: This is probably a reasonable location to check
-                #       self._imported_dotted_revno_info to see if this search
-                #       tip is known to already exist in the ancestry.
-                if db_id in self._imported_dotted_revno_info:
-                    # This should be removed as a search tip, we know it isn't
-                    # interesting, it is an ancestor of an imported revision
-                    self._search_tips.remove(db_id)
-                else:
-                    still_unknown.append(db_id)
+                still_unknown.append(db_id)
         return still_unknown
 
     def _split_interesting_using_children(self, unknown_search_tips):
@@ -570,7 +558,7 @@
         parent_to_children = {}
         already_imported = set()
         for parent, child in parent_child_res:
-            if (child in self._imported_dotted_revno_info
+            if (child in self._imported_dotted_revno
                 or child == self._imported_mainline_id):
                 # This child is already imported, so obviously the parent is,
                 # too.
@@ -612,7 +600,29 @@
 
     def _step_mainline(self):
         """Move the mainline pointer by one, updating the data."""
-        bork
+        res = self._cursor.execute(
+            "SELECT merged_revision, revno, end_of_merge, merge_depth"
+            "  FROM dotted_revno WHERE tip_revision = ?",
+            [self._imported_mainline_id]).fetchall()
+        self._imported_dotted_revno.update([(r[0], r[1:]) for r in res])
+        # TODO: We could remove search tips that show up as newly merged
+        #       though that can wait until the next
+        #       _split_search_tips_by_gdfo
+        # new_merged_ids = [r[0] for r in res]
+        res = self._cursor.execute("SELECT parent, gdfo"
+                                   "  FROM parent, revision"
+                                   " WHERE parent = db_id"
+                                   "   AND parent_idx = 0"
+                                   "   AND child = ?",
+                                   [self._imported_mainline_id]).fetchone()
+        if res is None:
+            # Walked off the mainline...
+            # TODO: Make sure this stuff is tested
+            self._imported_mainline_id = None
+            self._imported_gdfo = 0
+        else:
+            self._imported_mainline_id, self._imported_gdfo = res
+            self._known_gdfo[self._imported_mainline_id] = self._imported_gdfo
 
     def _step_search_tips(self):
         """Move the search tips to their parents."""
@@ -632,7 +642,7 @@
         #       check first.
         self._known_gdfo.update(res)
 
-    def do_import(self):
+    def _find_interesting_ancestry(self):
         self._get_initial_search_tips()
         while self._search_tips:
             # We don't know whether these search tips are known interesting, or
@@ -656,7 +666,13 @@
             # All search_tips are known to either be interesting or
             # uninteresting. Walk any search tips that remain.
             self._step_search_tips()
+        # Once we get to here, we should have loaded enough of
+        # _imported_dotted_revno to be able to create the merge_sort graph.
+        # Also all of the new pending revisions should be in
+        # self._interesting_ancestor_ids
 
+    def do_import(self):
+        self._find_interesting_ancestry()
 
 class Querier(object):
     """Perform queries on an existing history db."""

=== modified file 'test_importer.py'
--- a/test_importer.py	2010-04-05 20:32:39 +0000
+++ b/test_importer.py	2010-04-05 21:04:21 +0000
@@ -286,3 +286,33 @@
         inc_importer._step_search_tips()
         B_id = importer._rev_id_to_db_id['B']
         self.assertEqual(set([B_id]), inc_importer._search_tips)
+        self.assertEqual(set([E_id, G_id, F_id]),
+                         inc_importer._interesting_ancestor_ids)
+        # B is merged, so these two steps should not filter it out
+        self.assertEqual([B_id],
+                         inc_importer._split_search_tips_by_gdfo([B_id]))
+        self.assertEqual([B_id],
+                         inc_importer._split_interesting_using_children([B_id]))
+        # At this point, we have to step the mainline, to find out if we can
+        # filter out this search tip. After stepping, _imported_dotted_revno
+        # should be filled with the next mainline step
+        inc_importer._step_mainline()
+        A_id = importer._rev_id_to_db_id['A']
+        self.assertEqual(A_id, inc_importer._imported_mainline_id)
+        self.assertEqual(1, inc_importer._imported_gdfo)
+        C_id = importer._rev_id_to_db_id['C']
+        self.assertEqual({D_id: ('2', 0, 0), C_id: ('1.1.2', 0, 1),
+                          B_id: ('1.1.1', 1, 1),
+                         }, inc_importer._imported_dotted_revno)
+        # Search tips is not yet changed
+        self.assertEqual(set([B_id]), inc_importer._search_tips)
+        # And now when we check gdfo again, it should remove B_id from the
+        # search_tips, because it sees it in _imported_dotted_revno
+        self.assertEqual([], inc_importer._split_search_tips_by_gdfo([B_id]))
+        self.assertEqual(set([]), inc_importer._search_tips)
+
+    def test__incremental_find_interesting_ancestry(self):
+        b = self.make_interesting_branch()
+        b._tip_revision = 'D' # Start here
+        importer = history_db.Importer(':memory:', b, incremental=False)
+        importer.do_import()