Rev 439: Merge the page-loading change. in http://bazaar.launchpad.net/~loggerhead-team/loggerhead/trunk-rich

Wed Mar 16 14:43:56 UTC 2011

At http://bazaar.launchpad.net/~loggerhead-team/loggerhead/trunk-rich

------------------------------------------------------------
revno: 439 [merge]
revision-id: john at arbash-meinel.com-20110316144336-5or6keig1cbagois
parent: john at arbash-meinel.com-20110316123956-6jherozycdjmt9px
parent: john at arbash-meinel.com-20110314150147-70edfc2pkb557d2b
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk-rich
timestamp: Wed 2011-03-16 15:43:36 +0100
message:
  Merge the page-loading change.
  
  When /changes page is loaded, don't walk the whole ancestry during 'get_revids_from'.
  Instead, only walk enough history to generate the actual page, plus a little bit more
  to get the link for the 'next' page.
modified:
  NEWS                           news-20070121024650-6cwmhprgtcegpxvm-1
  loggerhead/controllers/changelog_ui.py changelog_ui.py-20061211064342-102iqirsciyvgtcf-19
  loggerhead/history.py          history.py-20061211064342-102iqirsciyvgtcf-5
  loggerhead/tests/test_history.py test_history.py-20110311094724-qreaf3ppfbus5rym-1
-------------- next part --------------
=== modified file 'NEWS'

--- a/NEWS	2011-03-16 12:24:06 +0000
+++ b/NEWS	2011-03-16 14:43:36 +0000
@@ -28,6 +28,11 @@
     - The json module is no longer claimed to be supported as alternative for 
       simplejson. (Jelmer Vernooij, #586611)
 
+    - Viewing the ``/changes`` page only iterates enough of the history to show
+      the actual revisions displayed, rather than walking the whole mainline.
+      Improves performance on projects with long histories like emacs.
+      (John Arbash Meinel)
+
 
 1.18 [10Nov2010]
 ----------------

=== modified file 'loggerhead/controllers/changelog_ui.py'
--- a/loggerhead/controllers/changelog_ui.py	2011-03-02 14:07:21 +0000
+++ b/loggerhead/controllers/changelog_ui.py	2011-03-12 13:25:52 +0000
@@ -47,7 +47,8 @@
 
         try:
             revid, start_revid, revid_list = history.get_view(
-                revid, start_revid, filter_file_id, query)
+                revid, start_revid, filter_file_id, query,
+                extra_rev_count=pagesize+1)
             util.set_context(kwargs)
 
             if (query is not None) and (len(revid_list) == 0):

=== modified file 'loggerhead/history.py'
--- a/loggerhead/history.py	2011-03-11 10:43:39 +0000
+++ b/loggerhead/history.py	2011-03-16 14:43:36 +0000
@@ -1,5 +1,4 @@
-#
-# Copyright (C) 2008, 2009 Canonical Ltd.
+# Copyright (C) 2006-2011 Canonical Ltd.
 #                     (Authored by Martin Albisetti <argentina at gmail.com>)
 # Copyright (C) 2006  Robey Pointer <robey at lag.net>
 # Copyright (C) 2006  Goffredo Baroncelli <kreijack at inwind.it>
@@ -350,10 +349,14 @@
                 r.add(self._rev_info[i][0][1])
                 i += 1
             return r
-        while True:
+        while revid_set:
             if bzrlib.revision.is_null(revid):
                 return
-            if introduced_revisions(revid) & revid_set:
+            rev_introduced = introduced_revisions(revid)
+            matching = rev_introduced.intersection(revid_set)
+            if matching:
+                # We don't need to look for these anymore.
+                revid_set.difference_update(matching)
                 yield revid
             parents = self._rev_info[self._rev_indices[revid]][2]
             if len(parents) == 0:
@@ -474,15 +477,41 @@
         if revid is None:
             revid = self.last_revid
         if file_id is not None:
-            # since revid is 'start_revid', possibly should start the path
-            # tracing from revid... FIXME
-            revlist = list(self.get_short_revision_history_by_fileid(file_id))
-            revlist = list(self.get_revids_from(revlist, revid))
+            revlist = list(
+                self.get_short_revision_history_by_fileid(file_id))
+            revlist = self.get_revids_from(revlist, revid)
         else:
-            revlist = list(self.get_revids_from(None, revid))
+            revlist = self.get_revids_from(None, revid)
         return revlist
 
-    def get_view(self, revid, start_revid, file_id, query=None):
+    @staticmethod
+    def _iterate_sufficiently(iterable, stop_at, extra_rev_count):
+        """Return a list of iterable.
+
+        If extra_rev_count is None, fully consume iterable.
+        Otherwise, stop at 'stop_at' + extra_rev_count.
+
+        Example:
+          iterate until you find stop_at, then iterate 10 more times.
+        """
+        if extra_rev_count is None:
+            return list(iterable)
+        result = []
+        found = False
+        for n in iterable:
+            result.append(n)
+            if n == stop_at:
+                found = True
+                break
+        if found:
+            for count, n in enumerate(iterable):
+                if count >= extra_rev_count:
+                    break
+                result.append(n)
+        return result
+
+    def get_view(self, revid, start_revid, file_id, query=None,
+                 extra_rev_count=None):
         """
         use the URL parameters (revid, start_revid, file_id, and query) to
         determine the revision list we're viewing (start_revid, file_id, query)
@@ -493,6 +522,10 @@
               file.
             - if a start_revid is given, we're viewing the branch from a
               specific revision up the tree.
+            - if extra_rev_count is given, find the view from start_revid =>
+              revid, and continue an additional 'extra_rev_count'. If not
+              given, then revid_list will contain the full history of
+              start_revid
 
         these may be combined to view revisions for a specific file, from
         a specific revision, with a specific search query.
@@ -511,12 +544,16 @@
 
         if query is None:
             revid_list = self.get_file_view(start_revid, file_id)
+            revid_list = self._iterate_sufficiently(revid_list, revid,
+                                                    extra_rev_count)
             if revid is None:
                 revid = start_revid
             if revid not in revid_list:
                 # if the given revid is not in the revlist, use a revlist that
                 # starts at the given revid.
                 revid_list = self.get_file_view(revid, file_id)
+                revid_list = self._iterate_sufficiently(revid_list, revid,
+                                                        extra_rev_count)
                 start_revid = revid
             return revid, start_revid, revid_list
 

=== modified file 'loggerhead/tests/test_history.py'
--- a/loggerhead/tests/test_history.py	2011-03-11 10:43:39 +0000
+++ b/loggerhead/tests/test_history.py	2011-03-16 14:43:36 +0000
@@ -23,7 +23,7 @@
 from loggerhead import history as _mod_history
 
 
-class TestHistoryGetRevidsFrom(tests.TestCaseWithMemoryTransport):
+class TestCaseWithExamples(tests.TestCaseWithMemoryTransport):
 
     def make_linear_ancestry(self):
         # Time goes up
@@ -43,6 +43,18 @@
         self.addCleanup(b.lock_read().unlock)
         return _mod_history.History(b, {})
 
+    def make_long_linear_ancestry(self):
+        builder = self.make_branch_builder('branch')
+        builder.start_series()
+        builder.build_snapshot('A', None, [
+            ('add', ('', 'root-id', 'directory', None))])
+        for r in "BCDEFGHIJKLMNOPQRSTUVWXYZ":
+            builder.build_snapshot(r, None, [])
+        builder.finish_series()
+        b = builder.get_branch()
+        self.addCleanup(b.lock_read().unlock)
+        return _mod_history.History(b, {})
+
     def make_merged_ancestry(self):
         # Time goes up
         # rev-3
@@ -90,6 +102,33 @@
         self.assertEqual(expected,
                          list(history.get_revids_from(search_revs, tip_rev)))
 
+
+class _DictProxy(object):
+
+    def __init__(self, d):
+        self._d = d
+        self._accessed = set()
+        self.__setitem__ = d.__setitem__
+
+    def __getitem__(self, name):
+        self._accessed.add(name)
+        return self._d[name]
+
+    def __len__(self):
+        return len(self._d)
+
+
+def track_rev_info_accesses(h):
+    """Track __getitem__ access to History._rev_info,
+
+    :return: set of items accessed
+    """
+    h._rev_info = _DictProxy(h._rev_info)
+    return h._rev_info._accessed
+
+
+class TestHistoryGetRevidsFrom(TestCaseWithExamples):
+
     def test_get_revids_from_simple_mainline(self):
         history = self.make_linear_ancestry()
         self.assertRevidsFrom(['rev-3', 'rev-2', 'rev-1'],
@@ -114,6 +153,39 @@
         self.assertRevidsFrom(['B'], history, ['B'], 'F')
         self.assertRevidsFrom(['A'], history, ['A'], 'F')
 
+    def test_get_revids_doesnt_over_produce_simple_mainline(self):
+        # get_revids_from shouldn't walk the whole ancestry just to get the
+        # answers for the first few revisions.
+        history = self.make_long_linear_ancestry()
+        accessed = track_rev_info_accesses(history)
+        result = history.get_revids_from(None, 'Z')
+        self.assertEqual(set(), accessed)
+        self.assertEqual('Z', result.next())
+        # We already know 'Z' because we passed it in.
+        self.assertEqual(set(), accessed)
+        self.assertEqual('Y', result.next())
+        self.assertEqual(set([history._rev_indices['Z']]), accessed)
+
+    def test_get_revids_doesnt_over_produce_for_merges(self):
+        # get_revids_from shouldn't walk the whole ancestry just to get the
+        # answers for the first few revisions.
+        history = self.make_long_linear_ancestry()
+        accessed = track_rev_info_accesses(history)
+        result = history.get_revids_from(['X', 'V'], 'Z')
+        self.assertEqual(set(), accessed)
+        self.assertEqual('X', result.next())
+        # We access 'W' because we are checking that W wasn't merged into X.
+        # The important bit is that we aren't getting the whole ancestry.
+        self.assertEqual(set([history._rev_indices[x] for x in 'ZYXW']),
+                         accessed)
+        self.assertEqual('V', result.next())
+        self.assertEqual(set([history._rev_indices[x] for x in 'ZYXWVU']),
+                         accessed)
+        self.assertRaises(StopIteration, result.next)
+        self.assertEqual(set([history._rev_indices[x] for x in 'ZYXWVU']),
+                         accessed)
+
+
 
 class TestHistoryChangeFromRevision(tests.TestCaseWithTransport):
 
@@ -171,3 +243,51 @@
         self.assertEqual([u'A Author <aauthor at example.com>',
                           u'B Author <bauthor at example.com>'],
                          change.authors)
+
+
+class TestHistory_IterateSufficiently(tests.TestCase):
+
+    def assertIterate(self, expected, iterable, stop_at, extra_rev_count):
+        self.assertEqual(expected, _mod_history.History._iterate_sufficiently(
+            iterable, stop_at, extra_rev_count))
+
+    def test_iter_no_extra(self):
+        lst = list('abcdefghijklmnopqrstuvwxyz')
+        self.assertIterate(['a', 'b', 'c'], iter(lst), 'c', 0)
+        self.assertIterate(['a', 'b', 'c', 'd'], iter(lst), 'd', 0)
+
+    def test_iter_not_found(self):
+        # If the key in question isn't found, we just exhaust the list
+        lst = list('abcdefghijklmnopqrstuvwxyz')
+        self.assertIterate(lst, iter(lst), 'not-there', 0)
+
+    def test_iter_with_extra(self):
+        lst = list('abcdefghijklmnopqrstuvwxyz')
+        self.assertIterate(['a', 'b', 'c'], iter(lst), 'b', 1)
+        self.assertIterate(['a', 'b', 'c', 'd', 'e'], iter(lst), 'c', 2)
+
+    def test_iter_with_too_many_extra(self):
+        lst = list('abcdefghijklmnopqrstuvwxyz')
+        self.assertIterate(lst, iter(lst), 'y', 10)
+        self.assertIterate(lst, iter(lst), 'z', 10)
+
+    def test_iter_with_extra_None(self):
+        lst = list('abcdefghijklmnopqrstuvwxyz')
+        self.assertIterate(lst, iter(lst), 'z', None)
+
+
+
+class TestHistoryGetView(TestCaseWithExamples):
+
+    def test_get_view_limited_history(self):
+        # get_view should only load enough history to serve the result, not all
+        # history.
+        history = self.make_long_linear_ancestry()
+        accessed = track_rev_info_accesses(history)
+        revid, start_revid, revid_list = history.get_view('Z', 'Z', None,
+                                                      extra_rev_count=5)
+        self.assertEqual(['Z', 'Y', 'X', 'W', 'V', 'U'], revid_list)
+        self.assertEqual('Z', revid)
+        self.assertEqual('Z', start_revid)
+        self.assertEqual(set([history._rev_indices[x] for x in 'ZYXWVU']),
+                         accessed)