Rev 5332: (lifeless) Make lsprof usage serialise rather than generated corrupted in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Sun Jul 4 10:38:11 BST 2010

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5332 [merge]
revision-id: pqm at pqm.ubuntu.com-20100704093810-tqw6ncyt13z65q0m
parent: pqm at pqm.ubuntu.com-20100703000054-331rh98r3u7s211l
parent: robertc at robertcollins.net-20100704062211-tk9hw6bnsn5x47fm
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sun 2010-07-04 10:38:10 +0100
message:
  (lifeless) Make lsprof usage serialise rather than generated corrupted
   results when used concurrently in a threaded application. Can now cause
   deadlocks when used reentrantly. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/lsprof.py               lsprof.py-20051208071030-833790916798ceed
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_lsprof.py    test_lsprof.py-20070606095601-bctdndm8yhc0cqnc-1
=== modified file 'NEWS'

--- a/NEWS	2010-06-30 08:35:19 +0000
+++ b/NEWS	2010-07-04 06:22:11 +0000
@@ -166,6 +166,11 @@
   InterBranch objects that work with multiple permutations to be
   comprehensively tested. (Robert Collins)
 
+* ``bzrlib.lsprof.profile`` will no longer silently generate bad threaded
+  profiles when concurrent profile requests are made. Instead the profile
+  requests will be serialised. Reentrant requests will now deadlock.
+  (Robert Collins)
+
 * ``bzrlib.knit.KnitSequenceMatcher``, which has been deprecated since
   2007, has been deleted.  Use ``PatienceSequenceMatcher`` from
   ``bzrlib.patiencediff`` instead. (Andrew Bennetts)

=== modified file 'bzrlib/lsprof.py'
--- a/bzrlib/lsprof.py	2009-08-24 21:05:09 +0000
+++ b/bzrlib/lsprof.py	2010-07-04 06:22:11 +0000
@@ -10,6 +10,7 @@
 import threading
 from _lsprof import Profiler, profiler_entry
 
+from bzrlib import errors
 
 __all__ = ['profile', 'Stats']
 
@@ -20,6 +21,9 @@
     raised, pass in a closure that will catch the exceptions and transform them
     appropriately for your driver function.
 
+    Important caveat: only one profile can execute at a time. See BzrProfiler
+    for details.
+
     :return: The functions return value and a stats object.
     """
     profiler = BzrProfiler()
@@ -41,8 +45,21 @@
     To use it, create a BzrProfiler and call start() on it. Some arbitrary
     time later call stop() to stop profiling and retrieve the statistics
     from the code executed in the interim.
+
+    Note that profiling involves a threading.Lock around the actual profiling.
+    This is needed because profiling involves global manipulation of the python
+    interpreter state. As such you cannot perform multiple profiles at once.
+    Trying to do so will lock out the second profiler unless the global 
+    bzrlib.lsprof.BzrProfiler.profiler_block is set to 0. Setting it to 0 will
+    cause profiling to fail rather than blocking.
     """
 
+    profiler_block = 1
+    """Serialise rather than failing to profile concurrent profile requests."""
+
+    profiler_lock = threading.Lock()
+    """Global lock used to serialise profiles."""
+
     def start(self):
         """Start profiling.
         
@@ -51,8 +68,16 @@
         """
         self._g_threadmap = {}
         self.p = Profiler()
-        self.p.enable(subcalls=True)
-        threading.setprofile(self._thread_profile)
+        permitted = self.__class__.profiler_lock.acquire(
+            self.__class__.profiler_block)
+        if not permitted:
+            raise errors.InternalBzrError(msg="Already profiling something")
+        try:
+            self.p.enable(subcalls=True)
+            threading.setprofile(self._thread_profile)
+        except:
+            self.__class__.profiler_lock.release()
+            raise
 
     def stop(self):
         """Stop profiling.
@@ -62,17 +87,20 @@
 
         :return: A bzrlib.lsprof.Stats object.
         """
-        self.p.disable()
-        for pp in self._g_threadmap.values():
-            pp.disable()
-        threading.setprofile(None)
-        p = self.p
-        self.p = None
-        threads = {}
-        for tid, pp in self._g_threadmap.items():
-            threads[tid] = Stats(pp.getstats(), {})
-        self._g_threadmap = None
-        return Stats(p.getstats(), threads)
+        try:
+            self.p.disable()
+            for pp in self._g_threadmap.values():
+                pp.disable()
+            threading.setprofile(None)
+            p = self.p
+            self.p = None
+            threads = {}
+            for tid, pp in self._g_threadmap.items():
+                threads[tid] = Stats(pp.getstats(), {})
+            self._g_threadmap = None
+            return Stats(p.getstats(), threads)
+        finally:
+            self.__class__.profiler_lock.release()
 
     def _thread_profile(self, f, *args, **kwds):
         # we lose the first profile point for a new thread in order to

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2010-06-29 16:21:13 +0000
+++ b/bzrlib/tests/__init__.py	2010-07-04 06:22:11 +0000
@@ -3377,6 +3377,9 @@
 
     def startTest(self, test):
         self.profiler = bzrlib.lsprof.BzrProfiler()
+        # Prevent deadlocks in tests that use lsprof: those tests will
+        # unavoidably fail.
+        bzrlib.lsprof.BzrProfiler.profiler_block = 0
         self.profiler.start()
         ForwardingResult.startTest(self, test)
 

=== modified file 'bzrlib/tests/test_lsprof.py'
--- a/bzrlib/tests/test_lsprof.py	2009-08-24 21:05:09 +0000
+++ b/bzrlib/tests/test_lsprof.py	2010-07-04 06:22:11 +0000
@@ -19,9 +19,10 @@
 
 import cPickle
 import os
+import threading
 
 import bzrlib
-from bzrlib import tests
+from bzrlib import errors, tests
 
 
 class _LSProfFeature(tests.Feature):
@@ -111,3 +112,40 @@
         lines = [str(data) for data in stats.data]
         lines = [line for line in lines if 'a_function' in line]
         self.assertLength(1, lines)
+
+    def test_block_0(self):
+        # When profiler_block is 0, reentrant profile requests fail.
+        self.overrideAttr(bzrlib.lsprof.BzrProfiler, 'profiler_block', 0)
+        inner_calls = []
+        def inner():
+            profiler = bzrlib.lsprof.BzrProfiler()
+            self.assertRaises(errors.BzrError, profiler.start)
+            inner_calls.append(True)
+        bzrlib.lsprof.profile(inner)
+        self.assertLength(1, inner_calls)
+
+    def test_block_1(self):
+        # When profiler_block is 1, concurrent profiles serialise.
+        # This is tested by manually acquiring the profiler lock, then
+        # starting a thread that tries to profile, and releasing the lock. 
+        # We know due to test_block_0 that two profiles at once hit the lock,
+        # so while this isn't perfect (we'd want a callback on the lock being
+        # entered to allow lockstep evaluation of the actions), its good enough
+        # to be confident regressions would be caught. Alternatively, if this
+        # is flakey, a fake Lock object can be used to trace the calls made.
+        calls = []
+        def profiled():
+            calls.append('profiled')
+        def do_profile():
+            bzrlib.lsprof.profile(profiled)
+            calls.append('after_profiled')
+        thread = threading.Thread(target=do_profile)
+        bzrlib.lsprof.BzrProfiler.profiler_lock.acquire()
+        try:
+            try:
+                thread.start()
+            finally:
+                bzrlib.lsprof.BzrProfiler.profiler_lock.release()
+        finally:
+            thread.join()
+        self.assertLength(2, calls)