Rev 60: We now filter ghosts, and _IncrementalImporter._compute_merge_sort knows to skip them. in http://bzr.arbash-meinel.com/plugins/history_db

Wed Apr 7 21:36:38 BST 2010

At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 60
revision-id: john at arbash-meinel.com-20100407203622-n23hx3wkktmb7rh9
parent: john at arbash-meinel.com-20100407200620-bg6ehrbisjpl2l3e
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Wed 2010-04-07 15:36:22 -0500
message:
  We now filter ghosts, and _IncrementalImporter._compute_merge_sort knows to skip them.
-------------- next part --------------
=== modified file 'history_db.py'

--- a/history_db.py	2010-04-07 20:06:20 +0000
+++ b/history_db.py	2010-04-07 20:36:22 +0000
@@ -464,6 +464,12 @@
         # Map from db_id => parent_ids
         self._parent_map = {}
 
+        # We just populate all known ghosts here.
+        # TODO: Ghosts are expected to be rare. If we find a case where probing
+        #       for them at runtime is better than grabbing them all at once,
+        #       re-evaluate this decision.
+        self._ghosts = None
+
     def _find_needed_mainline(self):
         """Find mainline revisions that need to be filled out.
         
@@ -498,6 +504,8 @@
         # (it gives us the basis for numbering everything). We do it now,
         # because it increases the 'cheap' filtering we can do right away.
         self._step_mainline()
+        ghost_res = self._cursor.execute("SELECT db_id FROM ghost").fetchall()
+        self._ghosts = set([g[0] for g in ghost_res])
 
     def _is_imported_db_id(self, tip_db_id):
         res = self._cursor.execute(
@@ -787,8 +795,13 @@
             is_first = True
         else:
             left_parent = parent_ids[0]
-            is_first = self._is_first_child(left_parent)
-        pending_parents = tuple(parent_ids[1:])
+            if left_parent in self._ghosts:
+                left_parent = None
+                is_first = True
+            else:
+                is_first = self._is_first_child(left_parent)
+        pending_parents = tuple([p for p in parent_ids[1:]
+                                    if p not in self._ghosts])
         # v- logically probably better as a tuple or object. We currently
         # modify it in place, so we use a list
         self._depth_first_stack.append([db_id, merge_depth, left_parent,
@@ -859,6 +872,7 @@
             self._revno_to_branch_count[0] = branch_count
             if branch_count == 0: # This is the mainline
                 revno = (1,)
+                self._branch_to_child_count[0] = 1
             else:
                 revno = (0, branch_count, 1)
         if not self._scheduled_stack:

=== modified file 'test_importer.py'
--- a/test_importer.py	2010-04-07 20:06:20 +0000
+++ b/test_importer.py	2010-04-07 20:36:22 +0000
@@ -548,4 +548,19 @@
         self.assertEqual([(self.A_id, (1,), True, 0),
                          ], inc_importer._scheduled_stack)
 
-    # TODO: Test for ghost handling
+    def test__incremental_merge_sort_skips_ghosts(self):
+        b = self.make_branch_with_ghosts()
+        importer = history_db.Importer(':memory:', b, incremental=False)
+        importer._update_ancestry('E')
+        self.grab_interesting_ids(importer._rev_id_to_db_id)
+        inc_importer = history_db._IncrementalImporter(importer, self.E_id)
+        inc_importer._find_interesting_ancestry()
+        inc_importer._compute_merge_sort()
+        # G is not mentioned in merge_sorted, neither as a left-hand parent,
+        # nor as a right-hand parent
+        self.assertEqual([(self.A_id, (1,), True, 0),
+                          (self.B_id, (2,), False, 0),
+                          (self.C_id, (3,), False, 0),
+                          (self.D_id, (0, 1, 1), True, 1),
+                          (self.E_id, (4,), False, 0),
+                         ], inc_importer._scheduled_stack)