[MERGE] log performance
Martin Pool
mbp at canonical.com
Fri Jun 23 05:48:56 BST 2006
On 22 Jun 2006, Aaron Bentley <aaron.bentley at utoronto.ca> wrote:
> Hi all,
>
> This patch optimizes performance of log formatters that don't show
> merges (i.e. --short and --line).
>
> 1. It adds a new codepath for selecting the revisions to display
> 2. It does not retrieve revisions that will not be displayed
>
> In my tests (which have a 1:1 ratio of merges to mainline), it goes from
> 190 ms to 109ms, that is 1.74x as fast.
>
> Aaron
well done :)
> === modified file bzrlib/benchmarks/__init__.py // last-changed:aaron.bentley at u
> ... toronto.ca-20060623021716-c9b81f6432adf745
> --- bzrlib/benchmarks/__init__.py
> +++ bzrlib/benchmarks/__init__.py
> @@ -74,6 +74,37 @@
> tree.unlock()
> return tree
>
> + def make_heavily_merged_tree(self, directory_name='.'):
> + """Create a tree with an egregious number of commits.
> +
> + No files change are included.
> + """
"egregious" means "outstandingly bad" but I don't think it's really
right here - having 250 cross merges is just large, not egregious.
It might (?) be good to add another sentence about what the structure of
merges are, in case anyone cares: two branches, one repeatedly merges
single commits from the other.
> + tree = BzrDir.create_standalone_workingtree(directory_name)
> + tree.lock_write()
> + tree.branch.lock_write()
> + tree.branch.repository.lock_write()
> + tree2 = tree.bzrdir.sprout('tree2').open_workingtree()
> + tree2.lock_write()
> + try:
> + for i in xrange(250):
> + revision_id = tree.commit('no-changes commit %d-a' % i)
> + tree2.branch.fetch(tree.branch, revision_id)
> + tree2.set_pending_merges([revision_id])
> + revision_id = tree2.commit('no-changes commit %d-b' % i)
> + tree.branch.fetch(tree2.branch, revision_id)
> + tree.set_pending_merges([revision_id])
> + finally:
> + try:
> + try:
> + tree.branch.repository.unlock()
> + finally:
> + tree.branch.unlock()
> + finally:
> + tree.unlock()
> + tree2.unlock()
> + tree.set_pending_merges([])
> + return tree
These won't unwind properly in some cases; if you're going to go to the
trouble of cleaning up (which is good) you might as well do it properly,
which means
tree.lock_write()
try:
tree.branch.lock_write()
finally:
tree.branch.unlock()
finally:
tree.unlock()
etc, with each acquisition/try/finally nested at its own level. For
example this will unwind properly in a failure to sprout. I suggest
this for a few reasons: firstly it sets a good example for others looking
at the code; secondly it makes it easier to read (without thinking "now
why are there all those things inside the finally block); thirdly it's
just more correct.
> +
>
> def test_suite():
> """Build and return a TestSuite which contains benchmark tests only."""
>
> === modified file bzrlib/benchmarks/bench_log.py // last-changed:aaron.bentley@
> ... utoronto.ca-20060623021716-c9b81f6432adf745
> --- bzrlib/benchmarks/bench_log.py
> +++ bzrlib/benchmarks/bench_log.py
> @@ -47,6 +47,12 @@
> lf = log_formatter('long', to_file=StringIO())
> self.time(show_log, tree.branch, lf, direction='reverse')
>
> + def test_merge_log(self):
> + """Run log in a tree with many merges"""
> + tree = self.make_heavily_merged_tree()
> + lf = log_formatter('short', to_file=StringIO())
> + self.time(show_log, tree.branch, lf, direction='reverse')
> +
> def test_log_screenful(self):
> """Simulate log --long|less"""
> self.screenful_tester('long')
>
> === modified file bzrlib/log.py
> --- bzrlib/log.py
> +++ bzrlib/log.py
> @@ -206,35 +206,25 @@
> cut_revs = which_revs[(start_revision-1):(end_revision)]
> if not cut_revs:
> return
> +
> + # convert the revision history to a dictionary:
> + rev_nos = dict([(k, v) for v, k in cut_revs])
> +
You can actually just say rev_nos = dict(cut_revs) if cut_revs is a
sequence of pairs, and that will avoid building a list.
> # override the mainline to look like the revision history.
> mainline_revs = [revision_id for index, revision_id in cut_revs]
> if cut_revs[0][0] == 1:
> mainline_revs.insert(0, None)
> else:
> mainline_revs.insert(0, which_revs[start_revision-2][1])
> -
> - merge_sorted_revisions = merge_sort(
> - branch.repository.get_revision_graph(mainline_revs[-1]),
> - mainline_revs[-1],
> - mainline_revs)
> -
> - if direction == 'reverse':
> - cut_revs.reverse()
> - elif direction == 'forward':
> - # forward means oldest first.
> - merge_sorted_revisions.reverse()
> + if getattr(lf, 'show_merge', False) is False:
> + no_merges = True
> else:
> - raise ValueError('invalid direction %r' % direction)
> -
> - revision_history = branch.revision_history()
> -
> - # convert the revision history to a dictionary:
> - rev_nos = {}
> - for index, rev_id in cut_revs:
> - rev_nos[rev_id] = index
> + no_merges = False
> + view_revisions = list(get_view_revisions(mainline_revs, rev_nos, branch,
> + direction, no_merges=no_merges))
>
> def iter_revisions():
> - revision_ids = [r for s, r, m, e in merge_sorted_revisions]
> + revision_ids = [r for r, n, d in view_revisions]
> num = 9
> while revision_ids:
> revisions = branch.repository.get_revisions(revision_ids[:num])
> @@ -247,8 +237,8 @@
> for revision in revisions:
> yield revision
> # now we just print all the revisions
> - for ((sequence, rev_id, merge_depth, end_of_merge), rev) in \
> - izip(merge_sorted_revisions, iter_revisions()):
> + for ((rev_id, revno, merge_depth), rev) in \
> + izip(view_revisions, iter_revisions()):
>
> if searchRE:
> if not searchRE.search(rev.message):
> @@ -267,11 +257,41 @@
> # although we calculated it, throw it away without display
> delta = None
>
> - lf.show(rev_nos[rev_id], rev, delta)
> - elif hasattr(lf, 'show_merge'):
> + lf.show(revno, rev, delta)
> + else:
> lf.show_merge(rev, merge_depth)
There's some nice reductions here. I'm not 100% comfortable with the
hasattr -- I generally like to reserve it for cases where you're really
doing "dynamic" lookups, though it's fuzzy in Python and I won't veto
it. But is it the presence of the attribute or its value that matters?
If the second, as I'd expect, shouldn't the second call be getattr too?
> +def get_view_revisions(mainline_revs, rev_nos, branch, direction,
> + no_merges=False):
> + """Produce an iterator of revisions to show
> + :return: an iterator of (revision_id, revno, merge_depth)
> + (if there is no revno for a revision, None is supplied)
> + """
> + if no_merges is True:
> + revision_ids = mainline_revs[1:]
> + if direction == 'reverse':
> + revision_ids.reverse()
> + for revision_id in revision_ids:
> + yield revision_id, rev_nos[revision_id], 0
> + return
> + merge_sorted_revisions = merge_sort(
> + branch.repository.get_revision_graph(mainline_revs[-1]),
> + mainline_revs[-1],
> + mainline_revs)
> +
> + if direction == 'forward':
> + # forward means oldest first.
> + merge_sorted_revisions.reverse()
> + elif direction != 'reverse':
> + raise ValueError('invalid direction %r' % direction)
> +
> + revision_history = branch.revision_history()
> +
> + for sequence, rev_id, merge_depth, end_of_merge in merge_sorted_revisions:
> + yield rev_id, rev_nos.get(rev_id), merge_depth
> +
> +
> def deltas_for_log_dummy(branch, which_revs):
> """Return all the revisions without intermediate deltas.
Rather than an inverted-sense boolean why not make it just
include_merges=True?
> === modified file bzrlib/tests/test_log.py
> @@ -301,3 +305,38 @@
> logfile.seek(0)
> log_contents = logfile.read()
> self.assertEqualDiff(log_contents, '1: Line-Log-Formatte... 2005-11-23 add a\n')
> +
> + def test_get_view_revisions(self):
> + """Test the get_view_revisions method"""
> + wt = self.make_branch_and_tree('tree1')
> + wt.commit('commit one', rev_id='1')
> + wt.commit('commit two', rev_id='2')
> + wt.commit('commit three', rev_id='3')
> + mainline_revs = [None, '1', '2', '3']
> + rev_nos = {'1': 1, '2': 2, '3': 3, '4b': 4}
> + revisions = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'forward'))
> + self.assertEqual(revisions, [('1', 1, 0), ('2', 2, 0), ('3', 3, 0)])
> + revisions2 = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'forward', no_merges=True))
> + self.assertEqual(revisions, revisions2)
> + revisions = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'reverse'))
> + self.assertEqual(revisions, [('3', 3, 0), ('2', 2, 0), ('1', 1, 0), ])
> + revisions2 = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'reverse', no_merges=True))
> + self.assertEqual(revisions, revisions2)
> + tree2 = wt.bzrdir.sprout('tree2').open_workingtree()
> + tree2.commit('four-a', rev_id='4a')
> + wt.branch.fetch(tree2.branch, '4a')
> + wt.add_pending_merge('4a')
> + wt.commit('four-b', rev_id='4b')
> + mainline_revs.append('4b')
> + revisions = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'forward'))
> + self.assertEqual(revisions, [('1', 1, 0), ('2', 2, 0), ('3', 3, 0),
> + ('4a', None, 1), ('4b', 4, 0)])
> + revisions = list(get_view_revisions(mainline_revs, rev_nos, wt.branch,
> + 'forward', no_merges=True))
> + self.assertEqual(revisions, [('1', 1, 0), ('2', 2, 0), ('3', 3, 0),
> + ('4b', 4, 0)])
This test is a bit long, therefore harder to read and see just what it's
testing. Could you perhaps split out the setup of building the
histories and have each assertion within a separate test?
+1 to merge with those changes.
Thanks
--
Martin
More information about the bazaar
mailing list