Rev 3711: Race-free stat-fingerprint updating during commit via a new method get_file_with_stat. in http://people.ubuntu.com/~robertc/baz2.0/commit-dirstate
Robert Collins
robertc at robertcollins.net
Mon Sep 22 06:15:31 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/commit-dirstate
------------------------------------------------------------
revno: 3711
revision-id: robertc at robertcollins.net-20080922051520-uhr3pn61w141kagv
parent: robertc at robertcollins.net-20080919065341-5t5w1p2gi926nfia
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-dirstate
timestamp: Mon 2008-09-22 15:15:20 +1000
message:
Race-free stat-fingerprint updating during commit via a new method get_file_with_stat.
added:
bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py test_get_file_with_s-20080922035909-lhdovrr36jpxmu0v-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/mutabletree.py mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
bzrlib/tests/test_selftest.py test_selftest.py-20051202044319-c110a115d8c0456a
bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
bzrlib/tests/workingtree_implementations/__init__.py __init__.py-20060203003124-b2aa5aca21a8bfad
bzrlib/workingtree.py workingtree.py-20050511021032-29b6ec0a681e02e3
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS 2008-09-19 06:53:41 +0000
+++ b/NEWS 2008-09-22 05:15:20 +0000
@@ -47,6 +47,9 @@
INTERNALS:
+ * New race-free method on MutableTree ``get_file_with_stat`` for use
+ when generating stat cache results. (Robert Collins)
+
bzr 1.7rc1 2008-09-09
---------------------
=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py 2008-09-19 06:53:41 +0000
+++ b/bzrlib/mutabletree.py 2008-09-22 05:15:20 +0000
@@ -201,6 +201,20 @@
"""Helper function for add - sets the entries of kinds."""
raise NotImplementedError(self._gather_kinds)
+ def get_file_with_stat(self, file_id, path=None):
+ """Get a file handle and stat object for file_id.
+
+ The default implementation returns (self.get_file, None) for backwards
+ compatibility.
+
+ :param file_id: The file id to read.
+ :param path: The path of the file, if it is known.
+ :return: A tuple (file_handle, stat_value_or_None). If the tree has
+ no stat facility, or need for a stat cache feedback during commit,
+ it may return None for the second element of the tuple.
+ """
+ return (self.get_file(file_id, path), None)
+
@needs_read_lock
def last_revision(self):
"""Return the revision id of the last commit performed in this tree.
@@ -247,23 +261,20 @@
"""
raise NotImplementedError(self.mkdir)
- def _observed_sha1(self, file_id, path, sha1):
+ def _observed_sha1(self, file_id, path, (sha1, stat_value)):
"""Tell the tree we have observed a paths sha1.
The intent of this function is to allow trees that have a hashcache to
- update the hashcache during commit. If the observed file is too new to
- be safely hash-cached the tree will ignore it; this will likewise mean
- that a file changed subsequent to the file's being read and sha'd will
- not lead to a false cache entry. A file move could cause this, and
- in future work it would be better to pass the cache fingerprint around
- so that its never separated from the sha, and we can supply the
- fingerprint back to the tree during this code path.
+ update the hashcache during commit. If the observed file is too new
+ (based on the stat_value) to be safely hash-cached the tree will ignore
+ it.
The default implementation does nothing.
:param file_id: The file id
:param path: The file path
:param sha1: The sha 1 that was observed.
+ :param stat_value: A stat result for the file the sha1 was read from.
:return: None
"""
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-09-19 06:53:41 +0000
+++ b/bzrlib/repository.py 2008-09-22 05:15:20 +0000
@@ -338,6 +338,9 @@
# if the kind changed the content obviously has
if kind != parent_entry.kind:
store = True
+ # Stat cache fingerprint feedback for the caller - None as we usually
+ # don't generate one.
+ fingerprint = None
if kind == 'file':
if content_summary[2] is None:
raise ValueError("Files must not have executable = None")
@@ -367,10 +370,16 @@
# absence of a content change in the file.
nostore_sha = None
ie.executable = content_summary[2]
- lines = tree.get_file(ie.file_id, path).readlines()
+ file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
+ try:
+ lines = file_obj.readlines()
+ finally:
+ file_obj.close()
try:
ie.text_sha1, ie.text_size = self._add_text_to_weave(
ie.file_id, lines, heads, nostore_sha)
+ # Let the caller know we generated a stat fingerprint.
+ fingerprint = (ie.text_sha1, stat_value)
except errors.ExistingContent:
# Turns out that the file content was unchanged, and we were
# only going to store a new node if it was changed. Carry over
@@ -418,7 +427,7 @@
else:
raise NotImplementedError('unknown kind')
ie.revision = self._new_revision_id
- return self._get_delta(ie, basis_inv, path), True, ie.text_sha1
+ return self._get_delta(ie, basis_inv, path), True, fingerprint
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
# Note: as we read the content directly from the tree, we know its not
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2008-09-04 20:32:04 +0000
+++ b/bzrlib/tests/__init__.py 2008-09-22 05:15:20 +0000
@@ -874,6 +874,21 @@
self.assertEqual(mode, mode_test,
'mode mismatch %o != %o' % (mode, mode_test))
+ def assertEqualStat(self, expected, actual):
+ """assert that expected and actual are the same stat result.
+
+ :param expected: A stat result.
+ :param actual: A stat result.
+ :raises AssertionError: If the expected and actual stat values differ
+ other than by atime.
+ """
+ self.assertEqual(expected.st_size, actual.st_size)
+ self.assertEqual(expected.st_mtime, actual.st_mtime)
+ self.assertEqual(expected.st_ctime, actual.st_ctime)
+ self.assertEqual(expected.st_dev, actual.st_dev)
+ self.assertEqual(expected.st_ino, actual.st_ino)
+ self.assertEqual(expected.st_mode, actual.st_mode)
+
def assertPositive(self, val):
"""Assert that val is greater than 0."""
self.assertTrue(val > 0, 'expected a positive value, but got %s' % val)
=== modified file 'bzrlib/tests/per_repository/test_commit_builder.py'
--- a/bzrlib/tests/per_repository/test_commit_builder.py 2008-09-19 06:53:41 +0000
+++ b/bzrlib/tests/per_repository/test_commit_builder.py 2008-09-22 05:15:20 +0000
@@ -414,7 +414,11 @@
else:
self.assertFalse(version_recorded)
if expect_fs_hash:
- self.assertEqual(tree.get_file_sha1(file_id), fs_hash)
+ tree_file_stat = tree.get_file_with_stat(file_id)
+ tree_file_stat[0].close()
+ self.assertEqual(2, len(fs_hash))
+ self.assertEqual(tree.get_file_sha1(file_id), fs_hash[0])
+ self.assertEqualStat(tree_file_stat[1], fs_hash[1])
else:
self.assertEqual(None, fs_hash)
new_entry = builder.new_inventory[file_id]
=== modified file 'bzrlib/tests/test_selftest.py'
--- a/bzrlib/tests/test_selftest.py 2008-09-04 21:05:18 +0000
+++ b/bzrlib/tests/test_selftest.py 2008-09-22 05:15:20 +0000
@@ -502,6 +502,19 @@
self.assertIsSameRealPath(self.test_dir, cwd)
self.assertIsSameRealPath(self.test_home_dir, os.environ['HOME'])
+ def test_assertEqualStat_equal(self):
+ from bzrlib.tests.test_dirstate import _FakeStat
+ self.build_tree(["foo"])
+ real = os.lstat("foo")
+ fake = _FakeStat(real.st_size, real.st_mtime, real.st_ctime,
+ real.st_dev, real.st_ino, real.st_mode)
+ self.assertEqualStat(real, fake)
+
+ def test_assertEqualStat_notequal(self):
+ self.build_tree(["foo", "bar"])
+ self.assertRaises(AssertionError, self.assertEqualStat,
+ os.lstat("foo"), os.lstat("bar"))
+
class TestTestCaseWithMemoryTransport(TestCaseWithMemoryTransport):
=== modified file 'bzrlib/tests/test_workingtree_4.py'
--- a/bzrlib/tests/test_workingtree_4.py 2008-09-19 06:53:41 +0000
+++ b/bzrlib/tests/test_workingtree_4.py 2008-09-22 05:15:20 +0000
@@ -599,9 +599,10 @@
def test_observed_sha1_cachable(self):
tree = self.get_tree_with_cachable_file_foo()
expected_sha1 = osutils.sha_file_by_name('foo')
+ statvalue = os.lstat("foo")
tree.lock_write()
try:
- tree._observed_sha1("foo-id", "foo", expected_sha1)
+ tree._observed_sha1("foo-id", "foo", (expected_sha1, statvalue))
self.assertEqual(expected_sha1,
tree._get_entry(path="foo")[1][0][1])
finally:
@@ -623,9 +624,117 @@
tree.lock_write()
try:
tree._observed_sha1("foo-id", "foo",
- osutils.sha_file_by_name('foo'))
+ (osutils.sha_file_by_name('foo'), os.lstat("foo")))
# Must not have changed
self.assertEqual(current_sha1,
tree._get_entry(path="foo")[1][0][1])
finally:
tree.unlock()
+
+ def test_get_file_with_stat_id_only(self):
+ # Explicit test to ensure we get a lstat value from WT4 trees.
+ tree = self.make_branch_and_tree('.')
+ self.build_tree(['foo'])
+ tree.add(['foo'], ['foo-id'])
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ file_obj, statvalue = tree.get_file_with_stat('foo-id')
+ expected = os.lstat('foo')
+ self.assertEqualStat(expected, statvalue)
+ self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
+
+class TestCorruptDirstate(TestCaseWithTransport):
+ """Tests for how we handle when the dirstate has been corrupted."""
+
+ def create_wt4(self):
+ control = bzrdir.BzrDirMetaFormat1().initialize(self.get_url())
+ control.create_repository()
+ control.create_branch()
+ tree = workingtree_4.WorkingTreeFormat4().initialize(control)
+ return tree
+
+ def test_invalid_rename(self):
+ tree = self.create_wt4()
+ # Create a corrupted dirstate
+ tree.lock_write()
+ try:
+ tree.commit('init') # We need a parent, or we always compare with NULL
+ state = tree.current_dirstate()
+ state._read_dirblocks_if_needed()
+ # Now add in an invalid entry, a rename with a dangling pointer
+ state._dirblocks[1][1].append((('', 'foo', 'foo-id'),
+ [('f', '', 0, False, ''),
+ ('r', 'bar', 0 , False, '')]))
+ self.assertListRaises(errors.CorruptDirstate,
+ tree.iter_changes, tree.basis_tree())
+ finally:
+ tree.unlock()
+
+ def get_simple_dirblocks(self, state):
+ """Extract the simple information from the DirState.
+
+ This returns the dirblocks, only with the sha1sum and stat details
+ filtered out.
+ """
+ simple_blocks = []
+ for block in state._dirblocks:
+ simple_block = (block[0], [])
+ for entry in block[1]:
+ # Include the key for each entry, and for each parent include
+ # just the minikind, so we know if it was
+ # present/absent/renamed/etc
+ simple_block[1].append((entry[0], [i[0] for i in entry[1]]))
+ simple_blocks.append(simple_block)
+ return simple_blocks
+
+ def test_update_basis_with_invalid_delta(self):
+ """When given an invalid delta, it should abort, and not be saved."""
+ self.build_tree(['dir/', 'dir/file'])
+ tree = self.create_wt4()
+ tree.lock_write()
+ self.addCleanup(tree.unlock)
+ tree.add(['dir', 'dir/file'], ['dir-id', 'file-id'])
+ first_revision_id = tree.commit('init')
+
+ root_id = tree.path2id('')
+ state = tree.current_dirstate()
+ state._read_dirblocks_if_needed()
+ self.assertEqual([
+ ('', [(('', '', root_id), ['d', 'd'])]),
+ ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+ ('dir', [(('dir', 'file', 'file-id'), ['f', 'f'])]),
+ ], self.get_simple_dirblocks(state))
+
+ tree.remove(['dir/file'])
+ self.assertEqual([
+ ('', [(('', '', root_id), ['d', 'd'])]),
+ ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+ ('dir', [(('dir', 'file', 'file-id'), ['a', 'f'])]),
+ ], self.get_simple_dirblocks(state))
+ # Make sure the removal is written to disk
+ tree.flush()
+
+ # self.assertRaises(Exception, tree.update_basis_by_delta,
+ new_dir = inventory.InventoryDirectory('dir-id', 'new-dir', root_id)
+ new_dir.revision = 'new-revision-id'
+ new_file = inventory.InventoryFile('file-id', 'new-file', root_id)
+ new_file.revision = 'new-revision-id'
+ self.assertRaises(errors.InconsistentDelta,
+ tree.update_basis_by_delta, 'new-revision-id',
+ [('dir', 'new-dir', 'dir-id', new_dir),
+ ('dir/file', 'new-dir/new-file', 'file-id', new_file),
+ ])
+ del state
+
+ # Now when we re-read the file it should not have been modified
+ tree.unlock()
+ tree.lock_read()
+ self.assertEqual(first_revision_id, tree.last_revision())
+ state = tree.current_dirstate()
+ state._read_dirblocks_if_needed()
+ self.assertEqual([
+ ('', [(('', '', root_id), ['d', 'd'])]),
+ ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+ ('dir', [(('dir', 'file', 'file-id'), ['a', 'f'])]),
+ ], self.get_simple_dirblocks(state))
=== modified file 'bzrlib/tests/workingtree_implementations/__init__.py'
--- a/bzrlib/tests/workingtree_implementations/__init__.py 2008-07-17 00:42:40 +0000
+++ b/bzrlib/tests/workingtree_implementations/__init__.py 2008-09-22 05:15:20 +0000
@@ -101,6 +101,7 @@
'bzrlib.tests.workingtree_implementations.test_commit',
'bzrlib.tests.workingtree_implementations.test_executable',
'bzrlib.tests.workingtree_implementations.test_flush',
+ 'bzrlib.tests.workingtree_implementations.test_get_file_with_stat',
'bzrlib.tests.workingtree_implementations.test_get_file_mtime',
'bzrlib.tests.workingtree_implementations.test_get_parent_ids',
'bzrlib.tests.workingtree_implementations.test_inv',
=== added file 'bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py'
--- a/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py 2008-09-22 05:15:20 +0000
@@ -0,0 +1,49 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Test that all WorkingTree's implement get_file_with_stat."""
+
+import os
+
+from bzrlib.tests.workingtree_implementations import TestCaseWithWorkingTree
+
+
+class TestGetFileWithStat(TestCaseWithWorkingTree):
+
+ def test_get_file_with_stat_id_only(self):
+ tree = self.make_branch_and_tree('.')
+ self.build_tree(['foo'])
+ tree.add(['foo'], ['foo-id'])
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ file_obj, statvalue = tree.get_file_with_stat('foo-id')
+ if statvalue is not None:
+ expected = os.lstat('foo')
+ self.assertEqualStat(expected, statvalue)
+ self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
+ def test_get_file_with_stat_id_and_path(self):
+ tree = self.make_branch_and_tree('.')
+ self.build_tree(['foo'])
+ tree.add(['foo'], ['foo-id'])
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ file_obj, statvalue = tree.get_file_with_stat('foo-id', 'foo')
+ expected = os.lstat('foo')
+ if statvalue is not None:
+ expected = os.lstat('foo')
+ self.assertEqualStat(expected, statvalue)
+ self.assertEqual(["contents of foo\n"], file_obj.readlines())
=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py 2008-09-05 05:12:35 +0000
+++ b/bzrlib/workingtree.py 2008-09-22 05:15:20 +0000
@@ -424,9 +424,14 @@
return osutils.lexists(self.abspath(filename))
def get_file(self, file_id, path=None):
+ return self.get_file_with_stat(file_id, path)[0]
+
+ def get_file_with_stat(self, file_id, path=None, _fstat=os.fstat):
+ """See MutableTree.get_file_with_stat."""
if path is None:
path = self.id2path(file_id)
- return self.get_file_byname(path)
+ file_obj = self.get_file_byname(path)
+ return (file_obj, _fstat(file_obj.fileno()))
def get_file_text(self, file_id):
return self.get_file(file_id).read()
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2008-09-19 06:53:41 +0000
+++ b/bzrlib/workingtree_4.py 2008-09-22 05:15:20 +0000
@@ -548,11 +548,10 @@
# path is missing on disk.
continue
- def _observed_sha1(self, file_id, path, sha1):
+ def _observed_sha1(self, file_id, path, (sha1, statvalue)):
"""See MutableTree._observed_sha1."""
state = self.current_dirstate()
entry = self._get_entry(file_id=file_id, path=path)
- statvalue = os.lstat(self.abspath(path))
state._observed_sha1(entry, sha1, statvalue)
def kind(self, file_id):
More information about the bazaar-commits
mailing list