Rev 58: \o/ Now a new root revision doesn't have to walk the entire ancestry. in http://bzr.arbash-meinel.com/plugins/history_db

Wed Apr 7 20:51:25 BST 2010

At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 58
revision-id: john at arbash-meinel.com-20100407195110-7ezlf79dwi69npp3
parent: john at arbash-meinel.com-20100407184329-8b2yzr0hpzn3p2xm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Wed 2010-04-07 14:51:10 -0500
message:
  \o/ Now a new root revision doesn't have to walk the entire ancestry.
  
  Now we just need to handle ghosts, and we should be all set.
-------------- next part --------------
=== modified file 'history_db.py'

--- a/history_db.py	2010-04-07 18:43:29 +0000
+++ b/history_db.py	2010-04-07 19:51:10 +0000
@@ -805,12 +805,10 @@
                 if len(parent_revno) == 1:
                     mini_revno = parent_revno[0] + 1
                     revno = (mini_revno,)
-                    # TODO: Do we need to increment
-                    #       self._branch_to_child_count[0]
-                    #       I think we do, but it is currently only used by
-                    #       _is_first_child, which should already be setting
-                    #       the 'seen' counters...
-                    # self._branch_to_child_count[0] = revno / max(revno, ...)
+                    # Not sure if we *have* to maintain it, but it does keep
+                    # our data-structures consistent
+                    if mini_revno > self._branch_to_child_count[0]:
+                        self._branch_to_child_count[0] = mini_revno
                 else:
                     revno = parent_revno[:2] + (parent_revno[2] + 1,)
             else:
@@ -825,11 +823,35 @@
                 self._revno_to_branch_count[base_revno] = branch_count
                 revno = (base_revno, branch_count, 1)
         else:
-            # New Root. To get the numbering correct, we have to walk the
-            # mainline back to the beginning... :(
-            ## XXX:
-            ## while self._imported_mainline_id is not None:
-            ##     self._step_mainline()
+            # We found a new root. There are 2 cases:
+            #   a) This is the very first revision in the branch. In which case
+            #      self._revno_to_branch_count won't have any counts for
+            #      'revno' 0.
+            #   b) This is a ghost / the first revision in a branch that was
+            #      merged. We need to allocate a new branch number.
+            #   This distinction is pretty much the same as the 'is_first'
+            #   check for revs with a parent if you consider the NULL revision
+            #   to be revno 0.
+            #   We have to walk back far enough to be sure that we have the
+            #   most-recent merged new-root. This can be detected by finding
+            #   any new root's first revision. And, of course, we should find
+            #   the last one first while walking backwards.
+            #   Theory:
+            #       When you see (0,X,1) you've reached the point where the X
+            #       number was chosen. A hypothetical (0,X+1,1) revision would
+            #       only be numbered X+1 if it was merged after (0,X,1). Thus
+            #       the *first* (0,?,1) revision you find merged must be the
+            #       last.
+
+            # As long as we haven't yet walked off the mainline...
+            while self._imported_mainline_id is not None:
+                # This is the oldest root that we have found so far
+                last_new_root = self._revno_to_branch_count.get(0, 0)
+                # See if we have found its 1 revision
+                first_rev_revno = (0, last_new_root, 1)
+                if first_rev_revno in self._dotted_to_db_id:
+                    break
+                self._step_mainline()
             branch_count = self._revno_to_branch_count.get(0, -1) + 1
             self._revno_to_branch_count[0] = branch_count
             if branch_count == 0: # This is the mainline

=== modified file 'test_importer.py'
--- a/test_importer.py	2010-04-07 18:42:13 +0000
+++ b/test_importer.py	2010-04-07 19:51:10 +0000
@@ -456,3 +456,60 @@
         self.assertEqual([(self.E_id, (0, 2, 1), True, 1),
                           (self.F_id, (4,), False, 0),
                          ], inc_importer._scheduled_stack)
+
+    def test__incremental_merge_sort_handles_partial_complex_multi_roots(self):
+        # Graph:
+        #  A B
+        #  |/ \
+        #  C E |
+        #  | | |
+        #  D F |
+        #  |/ /
+        #  G H
+        #  |/
+        #  I J
+        #  |/
+        #  K
+        
+        # Ideas:
+        # 1) (0,1,2) gets merged after (0,2,2). Which means we certainly have
+        #    to find (0,2,2) to get (0, 3, 1) correct. Even though we've found
+        #    a possible extra root.
+        # 2) We don't have to go all the way back to find (0,1,1) as soon as we
+        #    find (0,2,1).
+        ancestry = {'A': (), 'B': (), 'C': ('A', 'B'), 'D': ('C',), 'E': (),
+                    'F': ('E',), 'G': ('D', 'F'), 'H': ('B',), 'I': ('G', 'H'),
+                    'J': (), 'K': ('I', 'J')}
+        b = MockBranch(ancestry, 'I')
+        importer = history_db.Importer(':memory:', b, incremental=False)
+        importer.do_import()
+        importer._update_ancestry('K')
+        self.grab_interesting_ids(importer._rev_id_to_db_id)
+        inc_importer = history_db._IncrementalImporter(importer, self.K_id)
+        inc_importer._find_interesting_ancestry()
+        self.assertEqual(self.G_id, inc_importer._imported_mainline_id)
+        self.assertEqual(set([self.K_id, self.J_id]),
+                         inc_importer._interesting_ancestor_ids)
+        inc_importer._compute_merge_sort()
+        self.assertEqual([(self.J_id, (0, 3, 1), True, 1),
+                          (self.K_id, (6,), False, 0),
+                         ], inc_importer._scheduled_stack)
+        # We only have to walk back and stop at D because we have found (0,2,1)
+        # which must be the latest branch.
+        self.assertEqual(self.D_id, inc_importer._imported_mainline_id)
+
+    def test__incremental_merge_sort_first_rev(self):
+        # Trivial ancestry:
+        #  A
+        ancestry = {'A': ()}
+        b = MockBranch(ancestry, 'A')
+        importer = history_db.Importer(':memory:', b, incremental=False)
+        importer._update_ancestry('A')
+        self.grab_interesting_ids(importer._rev_id_to_db_id)
+        inc_importer = history_db._IncrementalImporter(importer, self.A_id)
+        inc_importer._find_interesting_ancestry()
+        inc_importer._compute_merge_sort()
+        self.assertEqual([(self.A_id, (1,), True, 0),
+                         ], inc_importer._scheduled_stack)
+
+    # TODO: Test for ghost handling