Rev 410: Play around with making merge_points a lazy property. in http://bazaar.launchpad.net/~jameinel/loggerhead/history_db

Wed Apr 14 18:08:16 BST 2010

At http://bazaar.launchpad.net/~jameinel/loggerhead/history_db

------------------------------------------------------------
revno: 410
revision-id: john at arbash-meinel.com-20100414170757-d88fac0am1lekqo8
parent: john at arbash-meinel.com-20100413205820-ctgno1xbshj4803o
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Wed 2010-04-14 12:07:57 -0500
message:
  Play around with making merge_points a lazy property.
  
  At the moment, it didn't seem 100% correct anyway. And it allows us to avoid
  loading the whole history (at least until I figure out how to compute it more
  cheaply.)
-------------- next part --------------
=== modified file 'loggerhead/apps/branch.py'

--- a/loggerhead/apps/branch.py	2010-03-25 16:19:24 +0000
+++ b/loggerhead/apps/branch.py	2010-04-14 17:07:57 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008, 2009 Canonical Ltd.
+# Copyright (C) 2008, 2009, 2010 Canonical Ltd.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -19,6 +19,7 @@
 import logging
 import urllib
 import sys
+import time
 
 import bzrlib.branch
 import bzrlib.errors
@@ -76,7 +77,7 @@
                                " continuing without using a cache")
             else:
                 file_cache = FileChangeCache(cache_path)
-                revinfo_disk_cache = RevInfoDiskCache(cache_path)
+                revinfo_disk_cache = None # RevInfoDiskCache(cache_path)
         return History(
             self.branch, self.graph_cache, file_cache=file_cache,
             revinfo_disk_cache=revinfo_disk_cache, cache_key=self.friendly_name)
@@ -160,14 +161,22 @@
         cls = self.controllers_dict.get(path)
         if cls is None:
             raise httpexceptions.HTTPNotFound()
-        self.branch.lock_read()
-        try:
+        def do_stuff():
+            self.branch.lock_read()
             try:
-                c = cls(self, self.get_history)
-                return c(environ, start_response)
-            except:
-                environ['exc_info'] = sys.exc_info()
-                environ['branch'] = self
-                raise
-        finally:
-            self.branch.unlock()
+                try:
+                    c = cls(self, self.get_history)
+                    return c(environ, start_response)
+                except:
+                    environ['exc_info'] = sys.exc_info()
+                    environ['branch'] = self
+                    raise
+            finally:
+                self.branch.unlock()
+        from bzrlib.commands import apply_lsprofiled
+        t = time.time()
+        ## res = apply_lsprofiled(',,prof.txt', do_stuff)
+        res = do_stuff()
+        t = time.time() - t
+        self.log.warn('do_stuff() took %.3fs' % (t,))
+        return res

=== modified file 'loggerhead/history.py'
--- a/loggerhead/history.py	2010-04-13 20:58:20 +0000
+++ b/loggerhead/history.py	2010-04-14 17:07:57 +0000
@@ -298,10 +298,10 @@
         self.last_revid = branch.last_revision()
 
         # XXX: Remove the whole-history type operations
-        caches = [RevInfoMemoryCache(whole_history_data_cache)]
-        if revinfo_disk_cache:
-            caches.append(revinfo_disk_cache)
-        self._load_whole_history_data(caches, cache_key)
+        ### caches = [RevInfoMemoryCache(whole_history_data_cache)]
+        ### if revinfo_disk_cache:
+        ###     caches.append(revinfo_disk_cache)
+        ### self._load_whole_history_data(caches, cache_key)
 
     @property
     def has_revisions(self):
@@ -345,9 +345,16 @@
             # TODO: I think we could just call
             # self._branch.repository.iter_reverse_revision_history(start_revid)
             # or something like that.
-            while tip_revid is not None:
-                yield tip_revid
-                tip_revid = self._get_lh_parent(tip_revid)
+            # TODO: This operation appears at the top of profiling currently
+            #       when loading the 'changes' page. Especially unfortunate
+            #       given that we only show ~20 revs...
+            if start_revid == self.last_revid:
+                history = reversed(self._branch.revision_history())
+            else:
+                history = self._branch.repository.iter_reverse_revision_history(
+                                start_revid)
+            for rev_id in history:
+                yield rev_id
             return
         revid_set = set(revid_list)
 
@@ -668,15 +675,23 @@
 
         # some data needs to be recalculated each time, because it may
         # change as new revisions are added.
-        for change in changes:
+        def merge_revids_prop(change, attr):
+            # TODO: In testing, this doesn't seem to do what I expected anyway.
+            #       So for now, just skip the work
+            return []
             merge_revids = self.simplify_merge_point_list(
-                               self.get_merge_point_list(change.revid))
-            change.merge_points = [
-                util.Container(revid=r,
-                revno=self.get_revno(r)) for r in merge_revids]
+                self.get_merge_point_list(change.revid))
+            points = [util.Container(revid=r, revno=self.get_revno(r))
+                      for r in merge_revids]
+            self.log.warn('merge_revids_prop triggered for %s => %s'
+                          % (change.revid, points))
+            return points
+        for change in changes:
+            change._set_property('merge_points', merge_revids_prop)
             if len(change.parents) > 0:
-                change.parents = [util.Container(revid=r,
-                    revno=self.get_revno(r)) for r in change.parents]
+                change.parents = [
+                    util.Container(revid=r, revno=self.get_revno(r))
+                    for r in change.parents]
             change.revno = self.get_revno(change.revid)
 
         parity = 0

=== modified file 'loggerhead/util.py'
--- a/loggerhead/util.py	2010-03-25 16:19:24 +0000
+++ b/loggerhead/util.py	2010-04-14 17:07:57 +0000
@@ -137,6 +137,7 @@
     """
 
     def __init__(self, _dict=None, **kw):
+        self._properties = {}
         if _dict is not None:
             for key, value in _dict.iteritems():
                 setattr(self, key, value)
@@ -153,6 +154,22 @@
         out += '}'
         return out
 
+    def __getattr__(self, attr):
+        """Used for handling things that aren't already available."""
+        if attr in self._properties:
+            val = self._properties[attr](self, attr)
+            setattr(self, attr, val)
+            return val
+        raise AttributeError('No attribute: %s' % (attr,))
+
+    def _set_property(self, attr, prop_func):
+        """Set a function that will be called when an attribute is desired.
+
+        We will cache the return value, so the function call should be
+        idempotent. We will pass 'self' and the 'attr' name when triggered.
+        """
+        self._properties[attr] = prop_func
+
 
 def trunc(text, limit=10):
     if len(text) <= limit: