Rev 4757: (mbp) more gracefully handle corrupt lockdirs (missing/null held files) in file:///home/pqm/archives/thelove/bzr/2.0/

Wed Sep 8 13:16:38 BST 2010

At file:///home/pqm/archives/thelove/bzr/2.0/

------------------------------------------------------------
revno: 4757 [merge]
revision-id: pqm at pqm.ubuntu.com-20100908121636-2xhzu6gw30fvmqi9
parent: pqm at pqm.ubuntu.com-20100720173406-cxfg7fgbzn1a90eo
parent: andrew.bennetts at canonical.com-20100906061352-ef2rw40pa5wte5oj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: 2.0
timestamp: Wed 2010-09-08 13:16:36 +0100
message:
  (mbp) more gracefully handle corrupt lockdirs (missing/null held files)
   (Andrew Bennetts)
modified:
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/lockdir.py              lockdir.py-20060220222025-98258adf27fbdda3
  bzrlib/tests/test_errors.py    test_errors.py-20060210110251-41aba2deddf936a8
  bzrlib/tests/test_lockdir.py   test_lockdir.py-20060220222025-33d4221569a3d600
  bzrlib/tests/test_rio.py       test_rio.py-20051128032247-dcd1082dfc86d3d3
=== modified file 'bzrlib/errors.py'

--- a/bzrlib/errors.py	2009-08-27 05:22:14 +0000
+++ b/bzrlib/errors.py	2010-09-06 06:13:52 +0000
@@ -1054,6 +1054,18 @@
         self.target = target
 
 
+class LockCorrupt(LockError):
+
+    _fmt = ("Lock is apparently held, but corrupted: %(corruption_info)s\n"
+            "Use 'bzr break-lock' to clear it")
+
+    internal_error = False
+
+    def __init__(self, corruption_info, file_data=None):
+        self.corruption_info = corruption_info
+        self.file_data = file_data
+
+
 class LockNotHeld(LockError):
 
     _fmt = "Lock not held: %(lock)s"

=== modified file 'bzrlib/lockdir.py'
--- a/bzrlib/lockdir.py	2010-02-26 06:51:51 +0000
+++ b/bzrlib/lockdir.py	2010-09-06 06:13:52 +0000
@@ -118,6 +118,7 @@
         LockBreakMismatch,
         LockBroken,
         LockContention,
+        LockCorrupt,
         LockFailed,
         LockNotHeld,
         NoSuchFile,
@@ -345,7 +346,13 @@
         it possibly being still active.
         """
         self._check_not_locked()
-        holder_info = self.peek()
+        try:
+            holder_info = self.peek()
+        except LockCorrupt, e:
+            # The lock info is corrupt.
+            if bzrlib.ui.ui_factory.get_boolean("Break (corrupt %r)" % (self,)):
+                self.force_break_corrupt(e.file_data)
+            return
         if holder_info is not None:
             lock_info = '\n'.join(self._format_lock_info(holder_info))
             if bzrlib.ui.ui_factory.get_boolean("Break %s" % lock_info):
@@ -392,6 +399,35 @@
         for hook in self.hooks['lock_broken']:
             hook(result)
 
+    def force_break_corrupt(self, corrupt_info_lines):
+        """Release a lock that has been corrupted.
+        
+        This is very similar to force_break, it except it doesn't assume that
+        self.peek() can work.
+        
+        :param corrupt_info_lines: the lines of the corrupted info file, used
+            to check that the lock hasn't changed between reading the (corrupt)
+            info file and calling force_break_corrupt.
+        """
+        # XXX: this copes with unparseable info files, but what about missing
+        # info files?  Or missing lock dirs?
+        self._check_not_locked()
+        tmpname = '%s/broken.%s.tmp' % (self.path, rand_chars(20))
+        self.transport.rename(self._held_dir, tmpname)
+        # check that we actually broke the right lock, not someone else;
+        # there's a small race window between checking it and doing the
+        # rename.
+        broken_info_path = tmpname + self.__INFO_NAME
+        f = self.transport.get(broken_info_path)
+        broken_lines = f.readlines()
+        if broken_lines != corrupt_info_lines:
+            raise LockBreakMismatch(self, broken_lines, corrupt_info_lines)
+        self.transport.delete(broken_info_path)
+        self.transport.rmdir(tmpname)
+        result = lock.LockResult(self.transport.abspath(self.path))
+        for hook in self.hooks['lock_broken']:
+            hook(result)
+
     def _check_not_locked(self):
         """If the lock is held by this instance, raise an error."""
         if self._lock_held:
@@ -456,7 +492,13 @@
         return s.to_string()
 
     def _parse_info(self, info_file):
-        stanza = rio.read_stanza(info_file.readlines())
+        lines = info_file.readlines()
+        try:
+            stanza = rio.read_stanza(lines)
+        except ValueError, e:
+            mutter('Corrupt lock info file: %r', lines)
+            raise LockCorrupt("could not parse lock info file: " + str(e),
+                              lines)
         if stanza is None:
             # see bug 185013; we fairly often end up with the info file being
             # empty after an interruption; we could log a message here but

=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py	2009-08-21 02:37:18 +0000
+++ b/bzrlib/tests/test_errors.py	2010-09-06 06:13:52 +0000
@@ -132,6 +132,13 @@
             "cannot be broken.",
             str(error))
 
+    def test_lock_corrupt(self):
+        error = errors.LockCorrupt("corruption info")
+        self.assertEqualDiff("Lock is apparently held, but corrupted: "
+            "corruption info\n"
+            "Use 'bzr break-lock' to clear it",
+            str(error))
+
     def test_knit_data_stream_incompatible(self):
         error = errors.KnitDataStreamIncompatible(
             'stream format', 'target format')

=== modified file 'bzrlib/tests/test_lockdir.py'
--- a/bzrlib/tests/test_lockdir.py	2010-02-26 06:51:51 +0000
+++ b/bzrlib/tests/test_lockdir.py	2010-09-06 06:13:52 +0000
@@ -568,6 +568,62 @@
         finally:
             bzrlib.ui.ui_factory = orig_factory
 
+    def test_break_lock_corrupt_info(self):
+        """break_lock works even if the info file is corrupt (and tells the UI
+        that it is corrupt).
+        """
+        ld = self.get_lock()
+        ld2 = self.get_lock()
+        ld.create()
+        ld.lock_write()
+        ld.transport.put_bytes_non_atomic('test_lock/held/info', '\0')
+        class LoggingUIFactory(bzrlib.ui.SilentUIFactory):
+            def __init__(self):
+                self.prompts = []
+            def get_boolean(self, prompt):
+                self.prompts.append(('boolean', prompt))
+                return True
+        ui = LoggingUIFactory()
+        orig_factory = bzrlib.ui.ui_factory
+        bzrlib.ui.ui_factory = ui
+        try:
+            ld2.break_lock()
+            self.assertLength(1, ui.prompts)
+            self.assertEqual('boolean', ui.prompts[0][0])
+            self.assertStartsWith(ui.prompts[0][1], 'Break (corrupt LockDir')
+            self.assertRaises(LockBroken, ld.unlock)
+        finally:
+            bzrlib.ui.ui_factory = orig_factory
+
+    def test_break_lock_missing_info(self):
+        """break_lock works even if the info file is missing (and tells the UI
+        that it is corrupt).
+        """
+        ld = self.get_lock()
+        ld2 = self.get_lock()
+        ld.create()
+        ld.lock_write()
+        ld.transport.delete('test_lock/held/info')
+        class LoggingUIFactory(bzrlib.ui.SilentUIFactory):
+            def __init__(self):
+                self.prompts = []
+            def get_boolean(self, prompt):
+                self.prompts.append(('boolean', prompt))
+                return True
+        ui = LoggingUIFactory()
+        orig_factory = bzrlib.ui.ui_factory
+        bzrlib.ui.ui_factory = ui
+        try:
+            ld2.break_lock()
+            self.assertRaises(LockBroken, ld.unlock)
+            self.assertLength(0, ui.prompts)
+        finally:
+            bzrlib.ui.ui_factory = orig_factory
+        # Suppress warnings due to ld not being unlocked
+        # XXX: if lock_broken hook was invoked in this case, this hack would
+        # not be necessary.  - Andrew Bennetts, 2010-09-06.
+        del self._lock_actions[:]
+
     def test_create_missing_base_directory(self):
         """If LockDir.path doesn't exist, it can be created
 
@@ -688,6 +744,51 @@
              'locked (unknown)'],
             formatted_info)
 
+    def test_corrupt_lockdir_info(self):
+        """We can cope with corrupt (and thus unparseable) info files."""
+        # This seems like a fairly common failure case too - see
+        # <https://bugs.edge.launchpad.net/bzr/+bug/619872> for instance.
+        # In particular some systems tend to fill recently created files with
+        # nul bytes after recovering from a system crash.
+        t = self.get_transport()
+        t.mkdir('test_lock')
+        t.mkdir('test_lock/held')
+        t.put_bytes('test_lock/held/info', '\0')
+        lf = LockDir(t, 'test_lock')
+        self.assertRaises(errors.LockCorrupt, lf.peek)
+        # Currently attempt_lock gives LockContention, but LockCorrupt would be
+        # a reasonable result too.
+        self.assertRaises(
+            (errors.LockCorrupt, errors.LockContention), lf.attempt_lock)
+        self.assertRaises(errors.LockCorrupt, lf.validate_token, 'fake token')
+
+    def test_missing_lockdir_info(self):
+        """We can cope with absent info files."""
+        t = self.get_transport()
+        t.mkdir('test_lock')
+        t.mkdir('test_lock/held')
+        lf = LockDir(t, 'test_lock')
+        # In this case we expect the 'not held' result from peek, because peek
+        # cannot be expected to notice that there is a 'held' directory with no
+        # 'info' file.
+        self.assertEqual(None, lf.peek())
+        # And lock/unlock may work or give LockContention (but not any other
+        # error).
+        try:
+            lf.attempt_lock()
+        except LockContention:
+            # LockContention is ok, and expected on Windows
+            pass
+        else:
+            # no error is ok, and expected on POSIX (because POSIX allows
+            # os.rename over an empty directory).
+            lf.unlock()
+        # Currently raises TokenMismatch, but LockCorrupt would be reasonable
+        # too.
+        self.assertRaises(
+            (errors.TokenMismatch, errors.LockCorrupt),
+            lf.validate_token, 'fake token')
+
 
 class TestLockDirHooks(TestCaseWithTransport):
 

=== modified file 'bzrlib/tests/test_rio.py'
--- a/bzrlib/tests/test_rio.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_rio.py	2010-09-06 06:13:52 +0000
@@ -188,6 +188,14 @@
         self.assertEqual(s, None)
         self.assertTrue(s is None)
 
+    def test_read_nul_byte(self):
+        """File consisting of a nul byte causes an error."""
+        self.assertRaises(ValueError, read_stanza, ['\0'])
+
+    def test_read_nul_bytes(self):
+        """File consisting of many nul bytes causes an error."""
+        self.assertRaises(ValueError, read_stanza, ['\0' * 100])
+
     def test_read_iter(self):
         """Read several stanzas from file"""
         tmpf = TemporaryFile()