Rev 6023: (mbp) flush changes to dirstate and repository to disk (but this can be in file:///home/pqm/archives/thelove/bzr/2.4/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Jul 27 05:05:19 UTC 2011
At file:///home/pqm/archives/thelove/bzr/2.4/
------------------------------------------------------------
revno: 6023 [merge]
revision-id: pqm at pqm.ubuntu.com-20110727050515-whqsxs065gfm7nc2
parent: pqm at pqm.ubuntu.com-20110725065258-m3xgnqlkf35acxml
parent: mbp at canonical.com-20110726073006-f05nll8bve0w84ld
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: 2.4
timestamp: Wed 2011-07-27 05:05:15 +0000
message:
(mbp) flush changes to dirstate and repository to disk (but this can be
turned off by an option) (bug 343427) (Martin Pool)
modified:
bzrlib/config.py config.py-20051011043216-070c74f4e9e338e8
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/help_topics/en/configuration.txt configuration.txt-20060314161707-868350809502af01
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/test_selftest.py test_selftest.py-20051202044319-c110a115d8c0456a
bzrlib/tests/test_transport.py testtransport.py-20050718175618-e5cdb99f4555ddce
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/transport/local.py local_transport.py-20050711165921-9b1f142bfe480c24
doc/developers/testing.txt testing.txt-20080812140359-i70zzh6v2z7grqex-1
doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/config.py'
--- a/bzrlib/config.py 2011-06-30 16:48:11 +0000
+++ b/bzrlib/config.py 2011-07-25 07:51:35 +0000
@@ -2282,6 +2282,15 @@
'editor', Option('editor'),
help='The command called to launch an editor to enter a message.')
+option_registry.register(
+ 'dirstate.fdatasync', Option('dirstate.fdatasync', default=True),
+ help='Flush dirstate changes onto physical disk?')
+
+option_registry.register(
+ 'repository.fdatasync',
+ Option('repository.fdatasync', default=True),
+ help='Flush repository changes onto physical disk?')
+
class Section(object):
"""A section defines a dict of option name => value.
@@ -2812,6 +2821,9 @@
class LocationStack(_CompatibleStack):
def __init__(self, location):
+ """Make a new stack for a location and global configuration.
+
+ :param location: A URL prefix to """
lstore = LocationStore()
matcher = LocationMatcher(lstore, location)
gstore = GlobalStore()
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2011-05-19 18:20:37 +0000
+++ b/bzrlib/dirstate.py 2011-07-25 07:11:56 +0000
@@ -232,6 +232,7 @@
from bzrlib import (
cache_utf8,
+ config,
debug,
errors,
inventory,
@@ -239,6 +240,7 @@
osutils,
static_tuple,
trace,
+ urlutils,
)
@@ -448,6 +450,8 @@
self._known_hash_changes = set()
# How many hash changed entries can we have without saving
self._worth_saving_limit = worth_saving_limit
+ self._config_stack = config.LocationStack(urlutils.local_path_to_url(
+ path))
def __repr__(self):
return "%s(%r)" % \
@@ -2508,33 +2512,41 @@
# IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
# to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
# fail to save IN_MEMORY_MODIFIED
- if self._worth_saving():
- grabbed_write_lock = False
- if self._lock_state != 'w':
- grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
- # Switch over to the new lock, as the old one may be closed.
+ if not self._worth_saving():
+ return
+
+ grabbed_write_lock = False
+ if self._lock_state != 'w':
+ grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
+ # Switch over to the new lock, as the old one may be closed.
+ # TODO: jam 20070315 We should validate the disk file has
+ # not changed contents, since temporary_write_lock may
+ # not be an atomic operation.
+ self._lock_token = new_lock
+ self._state_file = new_lock.f
+ if not grabbed_write_lock:
+ # We couldn't grab a write lock, so we switch back to a read one
+ return
+ try:
+ lines = self.get_lines()
+ self._state_file.seek(0)
+ self._state_file.writelines(lines)
+ self._state_file.truncate()
+ self._state_file.flush()
+ self._maybe_fdatasync()
+ self._mark_unmodified()
+ finally:
+ if grabbed_write_lock:
+ self._lock_token = self._lock_token.restore_read_lock()
+ self._state_file = self._lock_token.f
# TODO: jam 20070315 We should validate the disk file has
- # not changed contents. Since temporary_write_lock may
- # not be an atomic operation.
- self._lock_token = new_lock
- self._state_file = new_lock.f
- if not grabbed_write_lock:
- # We couldn't grab a write lock, so we switch back to a read one
- return
- try:
- lines = self.get_lines()
- self._state_file.seek(0)
- self._state_file.writelines(lines)
- self._state_file.truncate()
- self._state_file.flush()
- self._mark_unmodified()
- finally:
- if grabbed_write_lock:
- self._lock_token = self._lock_token.restore_read_lock()
- self._state_file = self._lock_token.f
- # TODO: jam 20070315 We should validate the disk file has
- # not changed contents. Since restore_read_lock may
- # not be an atomic operation.
+ # not changed contents. Since restore_read_lock may
+ # not be an atomic operation.
+
+ def _maybe_fdatasync(self):
+ """Flush to disk if possible and if not configured off."""
+ if self._config_stack.get('dirstate.fdatasync'):
+ osutils.fdatasync(self._state_file.fileno())
def _worth_saving(self):
"""Is it worth saving the dirstate or not?"""
=== modified file 'bzrlib/help_topics/en/configuration.txt'
--- a/bzrlib/help_topics/en/configuration.txt 2011-06-14 10:47:20 +0000
+++ b/bzrlib/help_topics/en/configuration.txt 2011-07-25 09:14:31 +0000
@@ -420,6 +420,13 @@
committed revisions only when the branch requires them. ``never`` will refuse
to sign newly committed revisions, even if the branch requires signatures.
+dirstate.fdatasync
+~~~~~~~~~~~~~~~~~~
+
+If true (default), working tree metadata changes are flushed through the
+OS buffers to physical disk. This is somewhat slower, but means data
+should not be lost if the machine crashes. See also repository.fdatasync.
+
recurse
~~~~~~~
@@ -501,6 +508,13 @@
:mapi: Use your preferred e-mail client on Windows.
:xdg-email: Use xdg-email to run your preferred mail program
+repository.fdatasync
+~~~~~~~~~~~~~~~~~~~~
+
+If true (default), repository changes are flushed through the OS buffers
+to physical disk. This is somewhat slower, but means data should not be
+lost if the machine crashes. See also dirstate.fdatasync.
+
submit_branch
~~~~~~~~~~~~~
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2011-05-19 09:32:38 +0000
+++ b/bzrlib/index.py 2011-07-05 06:41:44 +0000
@@ -245,6 +245,11 @@
"""
def finish(self):
+ """Finish the index.
+
+ :returns: cStringIO holding the full context of the index as it
+ should be written to disk.
+ """
lines = [_SIGNATURE]
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2011-06-16 18:34:26 +0000
+++ b/bzrlib/osutils.py 2011-07-25 07:11:56 +0000
@@ -2487,3 +2487,14 @@
is_local_pid_dead = win32utils.is_local_pid_dead
else:
is_local_pid_dead = _posix_is_local_pid_dead
+
+
+def fdatasync(fileno):
+ """Flush file contents to disk if possible.
+
+ :param fileno: Integer OS file handle.
+ :raises TransportNotPossible: If flushing to disk is not possible.
+ """
+ fn = getattr(os, 'fdatasync', getattr(os, 'fsync', None))
+ if fn is not None:
+ fn(fileno)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2011-05-27 12:01:22 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2011-07-25 07:51:35 +0000
@@ -25,6 +25,7 @@
from bzrlib import (
chk_map,
cleanup,
+ config,
debug,
graph,
osutils,
@@ -478,7 +479,8 @@
# visible is smaller. On the other hand none will be seen until
# they're in the names list.
self.index_sizes = [None, None, None, None]
- self._write_index('revision', self.revision_index, 'revision', suspend)
+ self._write_index('revision', self.revision_index, 'revision',
+ suspend)
self._write_index('inventory', self.inventory_index, 'inventory',
suspend)
self._write_index('text', self.text_index, 'file texts', suspend)
@@ -488,7 +490,8 @@
self.index_sizes.append(None)
self._write_index('chk', self.chk_index,
'content hash bytes', suspend)
- self.write_stream.close()
+ self.write_stream.close(
+ want_fdatasync=self._pack_collection.config_stack.get('repository.fdatasync'))
# Note that this will clobber an existing pack with the same name,
# without checking for hash collisions. While this is undesirable this
# is something that can be rectified in a subsequent release. One way
@@ -537,8 +540,14 @@
transport = self.upload_transport
else:
transport = self.index_transport
- self.index_sizes[self.index_offset(index_type)] = transport.put_file(
- index_name, index.finish(), mode=self._file_mode)
+ index_tempfile = index.finish()
+ index_bytes = index_tempfile.read()
+ write_stream = transport.open_write_stream(index_name,
+ mode=self._file_mode)
+ write_stream.write(index_bytes)
+ write_stream.close(
+ want_fdatasync=self._pack_collection.config_stack.get('repository.fdatasync'))
+ self.index_sizes[self.index_offset(index_type)] = len(index_bytes)
if 'pack' in debug.debug_flags:
# XXX: size might be interesting?
mutter('%s: create_pack: wrote %s index: %s%s t+%6.3fs',
@@ -822,6 +831,7 @@
set(all_combined).difference([combined_idx]))
# resumed packs
self._resumed_packs = []
+ self.config_stack = config.LocationStack(self.transport.base)
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.repo)
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2011-07-12 23:33:11 +0000
+++ b/bzrlib/tests/__init__.py 2011-07-27 05:05:15 +0000
@@ -1730,6 +1730,9 @@
def overrideAttr(self, obj, attr_name, new=_unitialized_attr):
"""Overrides an object attribute restoring it after the test.
+ :note: This should be used with discretion; you should think about
+ whether it's better to make the code testable without monkey-patching.
+
:param obj: The object that will be mutated.
:param attr_name: The attribute name we want to preserve/override in
@@ -1760,6 +1763,26 @@
self.addCleanup(osutils.set_or_unset_env, name, value)
return value
+ def recordCalls(self, obj, attr_name):
+ """Monkeypatch in a wrapper that will record calls.
+
+ The monkeypatch is automatically removed when the test concludes.
+
+ :param obj: The namespace holding the reference to be replaced;
+ typically a module, class, or object.
+ :param attr_name: A string for the name of the attribute to
+ patch.
+ :returns: A list that will be extended with one item every time the
+ function is called, with a tuple of (args, kwargs).
+ """
+ calls = []
+
+ def decorator(*args, **kwargs):
+ calls.append((args, kwargs))
+ return orig(*args, **kwargs)
+ orig = self.overrideAttr(obj, attr_name, decorator)
+ return calls
+
def _cleanEnvironment(self):
for name, value in isolated_environ.iteritems():
self.overrideEnv(name, value)
=== modified file 'bzrlib/tests/test_selftest.py'
--- a/bzrlib/tests/test_selftest.py 2011-06-17 13:59:23 +0000
+++ b/bzrlib/tests/test_selftest.py 2011-07-26 07:30:06 +0000
@@ -1684,6 +1684,18 @@
test.run(unittest.TestResult())
self.assertEqual('original', obj.test_attr)
+ def test_recordCalls(self):
+ from bzrlib.tests import test_selftest
+ calls = self.recordCalls(
+ test_selftest, '_add_numbers')
+ self.assertEqual(test_selftest._add_numbers(2, 10),
+ 12)
+ self.assertEquals(calls, [((2, 10), {})])
+
+
+def _add_numbers(a, b):
+ return a + b
+
class _MissingFeature(tests.Feature):
def _probe(self):
=== modified file 'bzrlib/tests/test_transport.py'
--- a/bzrlib/tests/test_transport.py 2011-05-13 12:51:05 +0000
+++ b/bzrlib/tests/test_transport.py 2011-07-05 05:32:18 +0000
@@ -16,6 +16,7 @@
from cStringIO import StringIO
+import os
import subprocess
import sys
import threading
@@ -722,6 +723,25 @@
self.assertEquals(t.local_abspath(''), here)
+class TestLocalTransportWriteStream(tests.TestCaseWithTransport):
+
+ def test_local_fdatasync_calls_fdatasync(self):
+ """Check fdatasync on a stream tries to flush the data to the OS.
+
+ We can't easily observe the external effect but we can at least see
+ it's called.
+ """
+ t = self.get_transport('.')
+ calls = self.recordCalls(os, 'fdatasync')
+ w = t.open_write_stream('out')
+ w.write('foo')
+ w.fdatasync()
+ with open('out', 'rb') as f:
+ # Should have been flushed.
+ self.assertEquals(f.read(), 'foo')
+ self.assertEquals(len(calls), 1, calls)
+
+
class TestWin32LocalTransport(tests.TestCase):
def test_unc_clone_to_root(self):
=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py 2011-07-25 06:09:35 +0000
+++ b/bzrlib/transport/__init__.py 2011-07-27 05:05:15 +0000
@@ -27,6 +27,7 @@
"""
from cStringIO import StringIO
+import os
import sys
from bzrlib.lazy_import import lazy_import
@@ -231,10 +232,24 @@
def _close(self):
"""A hook point for subclasses that need to take action on close."""
- def close(self):
+ def close(self, want_fdatasync=False):
+ if want_fdatasync:
+ try:
+ self.fdatasync()
+ except errors.TransportNotPossible:
+ pass
self._close()
del _file_streams[self.transport.abspath(self.relpath)]
+ def fdatasync(self):
+ """Force data out to physical disk if possible.
+
+ :raises TransportNotPossible: If this transport has no way to
+ flush to disk.
+ """
+ raise errors.TransportNotPossible(
+ "%s cannot fdatasync" % (self.transport,))
+
class FileFileStream(FileStream):
"""A file stream object returned by open_write_stream.
@@ -249,6 +264,15 @@
def _close(self):
self.file_handle.close()
+ def fdatasync(self):
+ """Force data out to physical disk if possible."""
+ self.file_handle.flush()
+ try:
+ fileno = self.file_handle.fileno()
+ except AttributeError:
+ raise errors.TransportNotPossible()
+ osutils.fdatasync(fileno)
+
def write(self, bytes):
osutils.pump_string_file(bytes, self.file_handle)
=== modified file 'bzrlib/transport/local.py'
--- a/bzrlib/transport/local.py 2011-04-07 10:36:24 +0000
+++ b/bzrlib/transport/local.py 2011-07-05 06:41:37 +0000
@@ -327,10 +327,9 @@
def open_write_stream(self, relpath, mode=None):
"""See Transport.open_write_stream."""
- # initialise the file
- self.put_bytes_non_atomic(relpath, "", mode=mode)
abspath = self._abspath(relpath)
handle = osutils.open_file(abspath, 'wb')
+ handle.truncate()
if mode is not None:
self._check_mode_and_size(abspath, handle.fileno(), mode)
transport._file_streams[self.abspath(relpath)] = handle
=== modified file 'doc/developers/testing.txt'
--- a/doc/developers/testing.txt 2011-05-30 07:36:53 +0000
+++ b/doc/developers/testing.txt 2011-07-05 05:16:33 +0000
@@ -1018,6 +1018,23 @@
self.overrideAttr(osutils, '_cached_user_encoding', 'latin-1')
+This should be used with discretion; sometimes it's better to make the
+underlying code more testable so that you don't need to rely on monkey
+patching.
+
+
+Observing calls to a function
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sometimes it's useful to observe how a function is called, typically when
+calling it has side effects but the side effects are not easy to observe
+from a test case. For instance the function may be expensive and we want
+to assert it is not called too many times, or it has effects on the
+machine that are safe to run during a test but not easy to measure. In
+these cases, you can use `recordCalls` which will monkey-patch in a
+wrapper that records when the function is called.
+
+
Temporarily changing environment variables
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt 2011-07-18 14:43:35 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt 2011-07-27 05:05:15 +0000
@@ -114,6 +114,13 @@
* ``Branch.open`` is now about 3x faster (about 2ms instead of 6.5ms).
(Andrew Bennetts).
+* Pack, dirstate, and index files are synced to persistent storage if
+ possible when writing finishes, to reduce the risk of problems caused by
+ a machine crash or similar problem. This can be turned off through the
+ ``dirstate.fdatasync`` and ``repository.fdatasync`` options, which can
+ be set in ``locations.conf`` or ``bazaar.conf``. (Martin Pool,
+ #343427)
+
Bug Fixes
*********
More information about the bazaar-commits
mailing list