Rev 102: The numbering seems to fit my expectations. in http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering

Fri Apr 16 20:28:04 BST 2010

At http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering

------------------------------------------------------------
revno: 102
revision-id: john at arbash-meinel.com-20100416192741-ahq70lbhz72cxtnt
parent: john at arbash-meinel.com-20100415215956-epo6zp0s8dki3giv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: tip_numbering
timestamp: Fri 2010-04-16 14:27:41 -0500
message:
  The numbering seems to fit my expectations.
-------------- next part --------------
=== modified file 'history_db.py'

--- a/history_db.py	2010-04-15 21:59:56 +0000
+++ b/history_db.py	2010-04-16 19:27:41 +0000
@@ -581,7 +581,7 @@
         # already-imported mainline.
         self._imported_dotted_revno = {}
         # What dotted revnos have been loaded
-        self._known_dotted = set()
+        ## self._known_dotted = set()
         # This is the gdfo of the current mainline revision search tip. This is
         # the threshold such that 
         self._imported_gdfo = None
@@ -641,7 +641,7 @@
             n.revno = (revno,)
             n._base_revno = revno
 
-    def _get_initial_search_tips(self):
+    def DONT_get_initial_search_tips(self):
         """Grab the right-hand parents of all the interesting mainline.
 
         We know we already searched all of the left-hand parents, so just grab
@@ -674,7 +674,7 @@
             return None
         return res[0]
 
-    def _split_search_tips_by_gdfo(self, unknown):
+    def DONT_split_search_tips_by_gdfo(self, unknown):
         """For these search tips, mark ones 'interesting' based on gdfo.
         
         All search tips are ancestors of _mainline_db_ids. So if their gdfo
@@ -709,7 +709,7 @@
                 still_unknown.append(db_id)
         return still_unknown
 
-    def _split_interesting_using_children(self, unknown_search_tips):
+    def DONT_split_interesting_using_children(self, unknown_search_tips):
         """Find children of these search tips.
 
         For each search tip, we find all of its known children. We then filter
@@ -782,6 +782,8 @@
 
     def _step_mainline(self):
         """Move the mainline pointer by one, updating the data."""
+        # TODO: We could probably use a gdfo hint to determine if we want to
+        #       step-by-one, or step-by-many
         self._stats['step mainline'] += 1
         if self._imported_mainline_id in self._importer._dotted_revno_cache:
             self._stats['step mainline cached'] += 1
@@ -848,7 +850,7 @@
         self._stats['num_search_tips'] += len(self._search_tips)
         self._known_gdfo.update(res)
 
-    def _ensure_lh_parent_info(self):
+    def DONT_ensure_lh_parent_info(self):
         """LH parents of interesting_ancestor_ids is either present or pending.
 
         Either the data should be in _imported_dotted_revno, or the lh parent
@@ -878,7 +880,7 @@
             missing_parent_ids = missing_parent_ids.difference(
                                     self._imported_dotted_revno)
 
-    def _find_interesting_ancestry(self):
+    def DONT_find_interesting_ancestry(self):
         self._find_needed_mainline()
         self._get_initial_search_tips()
         while self._search_tips:
@@ -915,7 +917,7 @@
     def _update_info_from_dotted_revno(self, dotted_info):
         """Update info like 'child_seen' from the dotted_revno info."""
         self._imported_dotted_revno.update(dotted_info)
-        self._known_dotted.update([i[1][0] for i in dotted_info])
+        ## self._known_dotted.update([i[1][0] for i in dotted_info])
 
     def _get_parents(self, db_id):
         if db_id in self._parent_map:
@@ -987,7 +989,7 @@
         node.end_of_merge = end_of_merge
         self._imported_dotted_revno[node.key] = (
             node.revno, end_of_merge, node.merge_depth)
-        self._known_dotted.add(node.revno)
+        ## self._known_dotted.add(node.revno)
         node._pending_parents = None
         self._scheduled_stack.append(node)
 
@@ -1006,18 +1008,26 @@
         """We are considering pushing this db_id to be numbered. Do we want to?
         """
         if db_id in self._imported_dotted_revno:
+            self._stats['not interesting known imported'] +=1
             return False
         gdfo = self._get_gdfo(db_id)
         # If this gdfo > the mainline gdfo, then we know it cannot have been
         # merged (if it is ==, then it can't be merged, but it might *be* the
         # mainline rev.)
+        # TODO: Track interesting_ancestor_ids and use the interesting_children
+        #       trick
         while (gdfo < self._imported_gdfo
                 and db_id not in self._imported_dotted_revno):
-            # We don't know if this is interesting or not
+            # We don't know if this is interesting or not. 
+            self._stats['step mainline unknown'] += 1
             self._step_mainline()
-        if (gdfo == self._imported_gdfo
+        if db_id in self._imported_dotted_revno:
+            self._stats['not interesting imported'] +=1
+            return False
+        if (gdfo == self._imported_dotted_revno
             and db_id == self._imported_mainline_id):
-            return False
+            self._stats['not interesting is mainline'] += 1
+        self._stats['is interesting'] += 1
         return True
 
     def _compute_merge_sort(self):
@@ -1025,6 +1035,8 @@
             # Nothing to number
             return
 
+        expected_base_revno = None
+        expected_branch_count = 0
         while self._depth_first_stack:
             last = self._depth_first_stack[-1]
             if not last._pending_parents:
@@ -1035,7 +1047,11 @@
                 next_db_id = last._pending_parents.pop()
                 if not self._is_interesting(next_db_id):
                     continue
+                if last.merge_depth == 0:
+                    expected_base_revno = last._base_revno
+                    expected_branch_count = 0
                 base_revno = last._base_revno
+                assert expected_base_revno == base_revno
                 if next_db_id == last._left_parent: #Is the left-parent?
                     next_merge_depth = last.merge_depth
                     next_branch_num = last._branch_num
@@ -1043,6 +1059,8 @@
                     next_merge_depth = last.merge_depth + 1
                     next_branch_num = self._revno_to_branch_count.get(base_revno, 0) + 1
                     self._revno_to_branch_count[base_revno] = next_branch_num
+                    expected_branch_count += 1
+                    assert next_branch_num == expected_branch_count
                 self._push_node(next_db_id, base_revno, next_branch_num,
                                 next_merge_depth)
                 # And switch to the outer loop

=== modified file 'test_importer.py'
--- a/test_importer.py	2010-04-15 21:59:56 +0000
+++ b/test_importer.py	2010-04-16 19:27:41 +0000
@@ -151,6 +151,33 @@
                    }
         return MockBranch(ancestry, 'E')
 
+    def make_multi_merge(self):
+        # Simple ancestry:
+        # A-.       1--------.
+        # |\ \      | \       \
+        # | B C     | 2.1.1  2.2.1
+        # | |/|     |  |   /  |
+        # | D E     | 2.1.2  2.3.1
+        # | |/|     |  |   /  |
+        # | F G     | 2.1.3  2.4.1
+        # | |/      |  |   /
+        # | H       | 2.1.4
+        # |/        |/
+        # I         2
+        #
+        # Both C and D refer to 'G' but that revision isn't actually present
+        ancestry = {'A': (),
+                    'B': ('A',),
+                    'C': ('A',),
+                    'D': ('B', 'C'),
+                    'E': ('C',),
+                    'F': ('D', 'E'),
+                    'G': ('E',),
+                    'H': ('F', 'G'),
+                    'I': ('A', 'H'),
+                   }
+        return MockBranch(ancestry, 'I')
+
     def grab_interesting_ids(self, rev_id_to_db_id):
         for rev_id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
             setattr(self, '%s_id' % (rev_id,), rev_id_to_db_id.get(rev_id))
@@ -700,13 +727,12 @@
         self.assertEqual([self.E_id, self.C_id, self.B_id, self.A_id],
                          inc_merger._mainline_db_ids)
         self.assertEqual(None, inc_merger._imported_mainline_id)
-        self.assertEqual(Node, inc_merger._imported_gdfo)
+        self.assertEqual(None, inc_merger._imported_gdfo)
         dfs = inc_merger._depth_first_stack
         self.assertEqual(inc_merger._mainline_db_ids, [n.key for n in dfs])
         self.assertEqual([4, 3, 2, 1], [n._base_revno for n in dfs])
         self.assertEqual([0]*4, [n.merge_depth for n in dfs])
         self.assertEqual([(4,), (3,), (2,), (1,)], [n.revno for n in dfs])
-        self.assertEqual([0]*4, [n._merge_dist for n in dfs])
         self.assertEqual([0]*4, [n._branch_num for n in dfs])
 
     def test__pop_first(self):
@@ -720,3 +746,41 @@
         self.assertEqual((1,), node.revno)
         self.assertEqual(0, node.merge_depth)
         self.assertEqual(self.A_id, node.key)
+
+    def test_interesting_merge_sort(self):
+        b = self.make_interesting_branch()
+        inc_merger = self.make_inc_merger(b, None, 'O')
+        res = inc_merger.topo_order()
+        self.assertEqual([(self.O_id, (6,), 0, False),
+                          (self.M_id, (6,1,2), 1, False),
+                          (self.L_id, (6,2,1), 2, True),
+                          (self.K_id, (6,1,1), 1, True),
+                          (self.N_id, (5,), 0, False),
+                          (self.J_id, (5,1,1), 1, True),
+                          (self.I_id, (4,), 0, False),
+                          (self.H_id, (4,1,1), 1, True),
+                          (self.G_id, (3,), 0, False),
+                          (self.F_id, (3,1,2), 1, False),
+                          (self.E_id, (3,1,1), 1, True),
+                          (self.D_id, (2,), 0, False),
+                          (self.C_id, (2,1,2), 1, False),
+                          (self.B_id, (2,1,1), 1, True),
+                          (self.A_id, (1,), 0, True),
+                         ], [(n.key, n.revno, n.merge_depth, n.end_of_merge)
+                             for n in res])
+
+    def test_multi_merge_sort(self):
+        b = self.make_multi_merge()
+        inc_merger = self.make_inc_merger(b, None, 'I')
+        res = inc_merger.topo_order()
+        self.assertEqual([(self.I_id, (2,), 0, False),
+                          (self.H_id, (2,1,4), 1, False),
+                          (self.G_id, (2,4,1), 2, True),
+                          (self.F_id, (2,1,3), 1, False),
+                          (self.E_id, (2,3,1), 2, True),
+                          (self.D_id, (2,1,2), 1, False),
+                          (self.C_id, (2,2,1), 2, True),
+                          (self.B_id, (2,1,1), 1, True),
+                          (self.A_id, (1,), 0, True),
+                         ], [(n.key, n.revno, n.merge_depth, n.end_of_merge)
+                             for n in res])