Rev 58: \o/ Now a new root revision doesn't have to walk the entire ancestry. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Wed Apr 7 20:51:25 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 58
revision-id: john at arbash-meinel.com-20100407195110-7ezlf79dwi69npp3
parent: john at arbash-meinel.com-20100407184329-8b2yzr0hpzn3p2xm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Wed 2010-04-07 14:51:10 -0500
message:
\o/ Now a new root revision doesn't have to walk the entire ancestry.
Now we just need to handle ghosts, and we should be all set.
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-07 18:43:29 +0000
+++ b/history_db.py 2010-04-07 19:51:10 +0000
@@ -805,12 +805,10 @@
if len(parent_revno) == 1:
mini_revno = parent_revno[0] + 1
revno = (mini_revno,)
- # TODO: Do we need to increment
- # self._branch_to_child_count[0]
- # I think we do, but it is currently only used by
- # _is_first_child, which should already be setting
- # the 'seen' counters...
- # self._branch_to_child_count[0] = revno / max(revno, ...)
+ # Not sure if we *have* to maintain it, but it does keep
+ # our data-structures consistent
+ if mini_revno > self._branch_to_child_count[0]:
+ self._branch_to_child_count[0] = mini_revno
else:
revno = parent_revno[:2] + (parent_revno[2] + 1,)
else:
@@ -825,11 +823,35 @@
self._revno_to_branch_count[base_revno] = branch_count
revno = (base_revno, branch_count, 1)
else:
- # New Root. To get the numbering correct, we have to walk the
- # mainline back to the beginning... :(
- ## XXX:
- ## while self._imported_mainline_id is not None:
- ## self._step_mainline()
+ # We found a new root. There are 2 cases:
+ # a) This is the very first revision in the branch. In which case
+ # self._revno_to_branch_count won't have any counts for
+ # 'revno' 0.
+ # b) This is a ghost / the first revision in a branch that was
+ # merged. We need to allocate a new branch number.
+ # This distinction is pretty much the same as the 'is_first'
+ # check for revs with a parent if you consider the NULL revision
+ # to be revno 0.
+ # We have to walk back far enough to be sure that we have the
+ # most-recent merged new-root. This can be detected by finding
+ # any new root's first revision. And, of course, we should find
+ # the last one first while walking backwards.
+ # Theory:
+ # When you see (0,X,1) you've reached the point where the X
+ # number was chosen. A hypothetical (0,X+1,1) revision would
+ # only be numbered X+1 if it was merged after (0,X,1). Thus
+ # the *first* (0,?,1) revision you find merged must be the
+ # last.
+
+ # As long as we haven't yet walked off the mainline...
+ while self._imported_mainline_id is not None:
+ # This is the oldest root that we have found so far
+ last_new_root = self._revno_to_branch_count.get(0, 0)
+ # See if we have found its 1 revision
+ first_rev_revno = (0, last_new_root, 1)
+ if first_rev_revno in self._dotted_to_db_id:
+ break
+ self._step_mainline()
branch_count = self._revno_to_branch_count.get(0, -1) + 1
self._revno_to_branch_count[0] = branch_count
if branch_count == 0: # This is the mainline
=== modified file 'test_importer.py'
--- a/test_importer.py 2010-04-07 18:42:13 +0000
+++ b/test_importer.py 2010-04-07 19:51:10 +0000
@@ -456,3 +456,60 @@
self.assertEqual([(self.E_id, (0, 2, 1), True, 1),
(self.F_id, (4,), False, 0),
], inc_importer._scheduled_stack)
+
+ def test__incremental_merge_sort_handles_partial_complex_multi_roots(self):
+ # Graph:
+ # A B
+ # |/ \
+ # C E |
+ # | | |
+ # D F |
+ # |/ /
+ # G H
+ # |/
+ # I J
+ # |/
+ # K
+
+ # Ideas:
+ # 1) (0,1,2) gets merged after (0,2,2). Which means we certainly have
+ # to find (0,2,2) to get (0, 3, 1) correct. Even though we've found
+ # a possible extra root.
+ # 2) We don't have to go all the way back to find (0,1,1) as soon as we
+ # find (0,2,1).
+ ancestry = {'A': (), 'B': (), 'C': ('A', 'B'), 'D': ('C',), 'E': (),
+ 'F': ('E',), 'G': ('D', 'F'), 'H': ('B',), 'I': ('G', 'H'),
+ 'J': (), 'K': ('I', 'J')}
+ b = MockBranch(ancestry, 'I')
+ importer = history_db.Importer(':memory:', b, incremental=False)
+ importer.do_import()
+ importer._update_ancestry('K')
+ self.grab_interesting_ids(importer._rev_id_to_db_id)
+ inc_importer = history_db._IncrementalImporter(importer, self.K_id)
+ inc_importer._find_interesting_ancestry()
+ self.assertEqual(self.G_id, inc_importer._imported_mainline_id)
+ self.assertEqual(set([self.K_id, self.J_id]),
+ inc_importer._interesting_ancestor_ids)
+ inc_importer._compute_merge_sort()
+ self.assertEqual([(self.J_id, (0, 3, 1), True, 1),
+ (self.K_id, (6,), False, 0),
+ ], inc_importer._scheduled_stack)
+ # We only have to walk back and stop at D because we have found (0,2,1)
+ # which must be the latest branch.
+ self.assertEqual(self.D_id, inc_importer._imported_mainline_id)
+
+ def test__incremental_merge_sort_first_rev(self):
+ # Trivial ancestry:
+ # A
+ ancestry = {'A': ()}
+ b = MockBranch(ancestry, 'A')
+ importer = history_db.Importer(':memory:', b, incremental=False)
+ importer._update_ancestry('A')
+ self.grab_interesting_ids(importer._rev_id_to_db_id)
+ inc_importer = history_db._IncrementalImporter(importer, self.A_id)
+ inc_importer._find_interesting_ancestry()
+ inc_importer._compute_merge_sort()
+ self.assertEqual([(self.A_id, (1,), True, 0),
+ ], inc_importer._scheduled_stack)
+
+ # TODO: Test for ghost handling
More information about the bazaar-commits
mailing list