Rev 68: It now seems to be *correct* and is capable of importing all of bzr.dev. in http://bzr.arbash-meinel.com/plugins/history_db

John Arbash Meinel john at arbash-meinel.com
Thu Apr 8 21:01:03 BST 2010


At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 68
revision-id: john at arbash-meinel.com-20100408200049-2fo6izcgb05g6j7u
parent: john at arbash-meinel.com-20100408173452-sqro9wruo1xdsloy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Thu 2010-04-08 15:00:49 -0500
message:
  It now seems to be *correct* and is capable of importing all of bzr.dev.
  
  It's pretty darn slow, though. Instead of being ~3min to import bzr.dev
  it is now 23min...
  
  Time to start profiling. I'm guessing the python<=>sqlite transitioning is
  the main problem, but I don't really know yet.
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py	2010-04-08 17:34:52 +0000
+++ b/history_db.py	2010-04-08 20:00:49 +0000
@@ -210,13 +210,26 @@
             # Assert that the result is valid
             actual_ms = self._graph.merge_sort((tip_revision_id,))
             actual_ms_iter = iter(actual_ms)
+
+            def assert_is_equal(x, y):
+                if x != y:
+                    import pdb; pdb.set_trace()
             for node in merge_sorted:
-                node.key = (db_id_to_rev_id[node.key],)
+                try:
+                    node.key = (db_id_to_rev_id[node.key],)
+                except KeyError: # Look this one up in the db
+                    rev_res = self._cursor.execute(
+                        "SELECT revision_id FROM revision WHERE db_id = ?",
+                        (node.key,)).fetchone()
+                    rev_id = rev_res[0]
+                    db_id_to_rev_id[node.key] = rev_id
+                    self._rev_id_to_db_id[rev_id] = node.key
+                    node.key = (rev_id,)
                 actual_node = actual_ms_iter.next()
-                assert node.key == actual_node.key
-                assert node.revno == actual_node.revno
-                assert node.merge_depth == actual_node.merge_depth
-                assert node.end_of_merge == actual_node.end_of_merge
+                assert_is_equal(node.key, actual_node.key)
+                assert_is_equal(node.revno, actual_node.revno)
+                assert_is_equal(node.merge_depth, actual_node.merge_depth)
+                assert_is_equal(node.end_of_merge, actual_node.end_of_merge)
         else:
             merge_sorted = self._graph.merge_sort((tip_revision_id,))
         try:
@@ -740,7 +753,7 @@
         Either the data should be in _imported_dotted_revno, or the lh parent
         should be in interesting_ancestor_ids (meaning we will number it).
         """
-        pmap = self._parent_map
+        #XXX REMOVE: pmap = self._parent_map
         missing_parent_ids = set()
         for db_id in self._interesting_ancestor_ids:
             parent_ids = self._get_parents(db_id)
@@ -854,6 +867,9 @@
 
     def _push_node(self, db_id, merge_depth):
         # TODO: Check if db_id is a ghost (not allowed on the stack)
+        if db_id not in self._interesting_ancestor_ids:
+            # This is a parent that we really don't need to number
+            return
         parent_ids = self._get_parents(db_id)
         if len(parent_ids) <= 0:
             left_parent = None
@@ -973,6 +989,9 @@
         self._depth_first_stack = []
         self._scheduled_stack = []
         self._seen_parents = set()
+        ## if not self._mainline_db_ids:
+        ##     # Nothing to number
+        ##     return
         self._push_node(self._mainline_db_ids[0], 0)
 
         while self._depth_first_stack:

=== modified file 'test_importer.py'
--- a/test_importer.py	2010-04-08 17:22:15 +0000
+++ b/test_importer.py	2010-04-08 20:00:49 +0000
@@ -643,3 +643,33 @@
                          [(self.D_id, (0, 1, 1), True, 1),
                           (self.E_id, (4,), False, 0),
                          ])
+
+    def test_ignore_uninteresting_ancestors(self):
+        # Graph:
+        # A
+        # |\
+        # B C
+        # |X|
+        # D E
+        # |\| 
+        # | F
+        # |/
+        # G
+        #
+        # Some did work C, while trunk evolved to B. C was landed, while
+        # concurrently someone tried to update C for the trunk changes.
+        # After trying to cleanup, they had to do it again.
+        # If D is imported, we should only number E and F, we shouldn't try to
+        # include B or C
+        # Note: This ancestry was taken from bzr.dev at 5114.1.1, which
+        # demonstrated the race condition.
+        ancestry = {'A': (), 'B': ('A',), 'C': ('A',), 'D': ('B', 'C'),
+                    'E': ('C', 'B'), 'F': ('E', 'D'), 'G': ('D', 'F')}
+        b = MockBranch(ancestry, 'G')
+        inc_merger = self.make_inc_merger(b, 'D', 'G')
+        inc_merger.topo_order()
+        self.assertScheduledStack(inc_merger,
+                         [(self.E_id, (1, 1, 2), True, 1),
+                          (self.F_id, (1, 1, 3), False, 1),
+                          (self.G_id, (4,), False, 0),
+                         ])



More information about the bazaar-commits mailing list