Rev 4757: (mbp) more gracefully handle corrupt lockdirs (missing/null held files) in file:///home/pqm/archives/thelove/bzr/2.0/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Sep 8 13:16:38 BST 2010
At file:///home/pqm/archives/thelove/bzr/2.0/
------------------------------------------------------------
revno: 4757 [merge]
revision-id: pqm at pqm.ubuntu.com-20100908121636-2xhzu6gw30fvmqi9
parent: pqm at pqm.ubuntu.com-20100720173406-cxfg7fgbzn1a90eo
parent: andrew.bennetts at canonical.com-20100906061352-ef2rw40pa5wte5oj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: 2.0
timestamp: Wed 2010-09-08 13:16:36 +0100
message:
(mbp) more gracefully handle corrupt lockdirs (missing/null held files)
(Andrew Bennetts)
modified:
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/lockdir.py lockdir.py-20060220222025-98258adf27fbdda3
bzrlib/tests/test_errors.py test_errors.py-20060210110251-41aba2deddf936a8
bzrlib/tests/test_lockdir.py test_lockdir.py-20060220222025-33d4221569a3d600
bzrlib/tests/test_rio.py test_rio.py-20051128032247-dcd1082dfc86d3d3
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2009-08-27 05:22:14 +0000
+++ b/bzrlib/errors.py 2010-09-06 06:13:52 +0000
@@ -1054,6 +1054,18 @@
self.target = target
+class LockCorrupt(LockError):
+
+ _fmt = ("Lock is apparently held, but corrupted: %(corruption_info)s\n"
+ "Use 'bzr break-lock' to clear it")
+
+ internal_error = False
+
+ def __init__(self, corruption_info, file_data=None):
+ self.corruption_info = corruption_info
+ self.file_data = file_data
+
+
class LockNotHeld(LockError):
_fmt = "Lock not held: %(lock)s"
=== modified file 'bzrlib/lockdir.py'
--- a/bzrlib/lockdir.py 2010-02-26 06:51:51 +0000
+++ b/bzrlib/lockdir.py 2010-09-06 06:13:52 +0000
@@ -118,6 +118,7 @@
LockBreakMismatch,
LockBroken,
LockContention,
+ LockCorrupt,
LockFailed,
LockNotHeld,
NoSuchFile,
@@ -345,7 +346,13 @@
it possibly being still active.
"""
self._check_not_locked()
- holder_info = self.peek()
+ try:
+ holder_info = self.peek()
+ except LockCorrupt, e:
+ # The lock info is corrupt.
+ if bzrlib.ui.ui_factory.get_boolean("Break (corrupt %r)" % (self,)):
+ self.force_break_corrupt(e.file_data)
+ return
if holder_info is not None:
lock_info = '\n'.join(self._format_lock_info(holder_info))
if bzrlib.ui.ui_factory.get_boolean("Break %s" % lock_info):
@@ -392,6 +399,35 @@
for hook in self.hooks['lock_broken']:
hook(result)
+ def force_break_corrupt(self, corrupt_info_lines):
+ """Release a lock that has been corrupted.
+
+ This is very similar to force_break, it except it doesn't assume that
+ self.peek() can work.
+
+ :param corrupt_info_lines: the lines of the corrupted info file, used
+ to check that the lock hasn't changed between reading the (corrupt)
+ info file and calling force_break_corrupt.
+ """
+ # XXX: this copes with unparseable info files, but what about missing
+ # info files? Or missing lock dirs?
+ self._check_not_locked()
+ tmpname = '%s/broken.%s.tmp' % (self.path, rand_chars(20))
+ self.transport.rename(self._held_dir, tmpname)
+ # check that we actually broke the right lock, not someone else;
+ # there's a small race window between checking it and doing the
+ # rename.
+ broken_info_path = tmpname + self.__INFO_NAME
+ f = self.transport.get(broken_info_path)
+ broken_lines = f.readlines()
+ if broken_lines != corrupt_info_lines:
+ raise LockBreakMismatch(self, broken_lines, corrupt_info_lines)
+ self.transport.delete(broken_info_path)
+ self.transport.rmdir(tmpname)
+ result = lock.LockResult(self.transport.abspath(self.path))
+ for hook in self.hooks['lock_broken']:
+ hook(result)
+
def _check_not_locked(self):
"""If the lock is held by this instance, raise an error."""
if self._lock_held:
@@ -456,7 +492,13 @@
return s.to_string()
def _parse_info(self, info_file):
- stanza = rio.read_stanza(info_file.readlines())
+ lines = info_file.readlines()
+ try:
+ stanza = rio.read_stanza(lines)
+ except ValueError, e:
+ mutter('Corrupt lock info file: %r', lines)
+ raise LockCorrupt("could not parse lock info file: " + str(e),
+ lines)
if stanza is None:
# see bug 185013; we fairly often end up with the info file being
# empty after an interruption; we could log a message here but
=== modified file 'bzrlib/tests/test_errors.py'
--- a/bzrlib/tests/test_errors.py 2009-08-21 02:37:18 +0000
+++ b/bzrlib/tests/test_errors.py 2010-09-06 06:13:52 +0000
@@ -132,6 +132,13 @@
"cannot be broken.",
str(error))
+ def test_lock_corrupt(self):
+ error = errors.LockCorrupt("corruption info")
+ self.assertEqualDiff("Lock is apparently held, but corrupted: "
+ "corruption info\n"
+ "Use 'bzr break-lock' to clear it",
+ str(error))
+
def test_knit_data_stream_incompatible(self):
error = errors.KnitDataStreamIncompatible(
'stream format', 'target format')
=== modified file 'bzrlib/tests/test_lockdir.py'
--- a/bzrlib/tests/test_lockdir.py 2010-02-26 06:51:51 +0000
+++ b/bzrlib/tests/test_lockdir.py 2010-09-06 06:13:52 +0000
@@ -568,6 +568,62 @@
finally:
bzrlib.ui.ui_factory = orig_factory
+ def test_break_lock_corrupt_info(self):
+ """break_lock works even if the info file is corrupt (and tells the UI
+ that it is corrupt).
+ """
+ ld = self.get_lock()
+ ld2 = self.get_lock()
+ ld.create()
+ ld.lock_write()
+ ld.transport.put_bytes_non_atomic('test_lock/held/info', '\0')
+ class LoggingUIFactory(bzrlib.ui.SilentUIFactory):
+ def __init__(self):
+ self.prompts = []
+ def get_boolean(self, prompt):
+ self.prompts.append(('boolean', prompt))
+ return True
+ ui = LoggingUIFactory()
+ orig_factory = bzrlib.ui.ui_factory
+ bzrlib.ui.ui_factory = ui
+ try:
+ ld2.break_lock()
+ self.assertLength(1, ui.prompts)
+ self.assertEqual('boolean', ui.prompts[0][0])
+ self.assertStartsWith(ui.prompts[0][1], 'Break (corrupt LockDir')
+ self.assertRaises(LockBroken, ld.unlock)
+ finally:
+ bzrlib.ui.ui_factory = orig_factory
+
+ def test_break_lock_missing_info(self):
+ """break_lock works even if the info file is missing (and tells the UI
+ that it is corrupt).
+ """
+ ld = self.get_lock()
+ ld2 = self.get_lock()
+ ld.create()
+ ld.lock_write()
+ ld.transport.delete('test_lock/held/info')
+ class LoggingUIFactory(bzrlib.ui.SilentUIFactory):
+ def __init__(self):
+ self.prompts = []
+ def get_boolean(self, prompt):
+ self.prompts.append(('boolean', prompt))
+ return True
+ ui = LoggingUIFactory()
+ orig_factory = bzrlib.ui.ui_factory
+ bzrlib.ui.ui_factory = ui
+ try:
+ ld2.break_lock()
+ self.assertRaises(LockBroken, ld.unlock)
+ self.assertLength(0, ui.prompts)
+ finally:
+ bzrlib.ui.ui_factory = orig_factory
+ # Suppress warnings due to ld not being unlocked
+ # XXX: if lock_broken hook was invoked in this case, this hack would
+ # not be necessary. - Andrew Bennetts, 2010-09-06.
+ del self._lock_actions[:]
+
def test_create_missing_base_directory(self):
"""If LockDir.path doesn't exist, it can be created
@@ -688,6 +744,51 @@
'locked (unknown)'],
formatted_info)
+ def test_corrupt_lockdir_info(self):
+ """We can cope with corrupt (and thus unparseable) info files."""
+ # This seems like a fairly common failure case too - see
+ # <https://bugs.edge.launchpad.net/bzr/+bug/619872> for instance.
+ # In particular some systems tend to fill recently created files with
+ # nul bytes after recovering from a system crash.
+ t = self.get_transport()
+ t.mkdir('test_lock')
+ t.mkdir('test_lock/held')
+ t.put_bytes('test_lock/held/info', '\0')
+ lf = LockDir(t, 'test_lock')
+ self.assertRaises(errors.LockCorrupt, lf.peek)
+ # Currently attempt_lock gives LockContention, but LockCorrupt would be
+ # a reasonable result too.
+ self.assertRaises(
+ (errors.LockCorrupt, errors.LockContention), lf.attempt_lock)
+ self.assertRaises(errors.LockCorrupt, lf.validate_token, 'fake token')
+
+ def test_missing_lockdir_info(self):
+ """We can cope with absent info files."""
+ t = self.get_transport()
+ t.mkdir('test_lock')
+ t.mkdir('test_lock/held')
+ lf = LockDir(t, 'test_lock')
+ # In this case we expect the 'not held' result from peek, because peek
+ # cannot be expected to notice that there is a 'held' directory with no
+ # 'info' file.
+ self.assertEqual(None, lf.peek())
+ # And lock/unlock may work or give LockContention (but not any other
+ # error).
+ try:
+ lf.attempt_lock()
+ except LockContention:
+ # LockContention is ok, and expected on Windows
+ pass
+ else:
+ # no error is ok, and expected on POSIX (because POSIX allows
+ # os.rename over an empty directory).
+ lf.unlock()
+ # Currently raises TokenMismatch, but LockCorrupt would be reasonable
+ # too.
+ self.assertRaises(
+ (errors.TokenMismatch, errors.LockCorrupt),
+ lf.validate_token, 'fake token')
+
class TestLockDirHooks(TestCaseWithTransport):
=== modified file 'bzrlib/tests/test_rio.py'
--- a/bzrlib/tests/test_rio.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_rio.py 2010-09-06 06:13:52 +0000
@@ -188,6 +188,14 @@
self.assertEqual(s, None)
self.assertTrue(s is None)
+ def test_read_nul_byte(self):
+ """File consisting of a nul byte causes an error."""
+ self.assertRaises(ValueError, read_stanza, ['\0'])
+
+ def test_read_nul_bytes(self):
+ """File consisting of many nul bytes causes an error."""
+ self.assertRaises(ValueError, read_stanza, ['\0' * 100])
+
def test_read_iter(self):
"""Read several stanzas from file"""
tmpf = TemporaryFile()
More information about the bazaar-commits
mailing list