Rev 3711: Race-free stat-fingerprint updating during commit via a new method get_file_with_stat. in http://people.ubuntu.com/~robertc/baz2.0/commit-dirstate

Robert Collins robertc at robertcollins.net
Mon Sep 22 06:15:31 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/commit-dirstate

------------------------------------------------------------
revno: 3711
revision-id: robertc at robertcollins.net-20080922051520-uhr3pn61w141kagv
parent: robertc at robertcollins.net-20080919065341-5t5w1p2gi926nfia
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-dirstate
timestamp: Mon 2008-09-22 15:15:20 +1000
message:
  Race-free stat-fingerprint updating during commit via a new method get_file_with_stat.
added:
  bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py test_get_file_with_s-20080922035909-lhdovrr36jpxmu0v-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
  bzrlib/tests/test_selftest.py  test_selftest.py-20051202044319-c110a115d8c0456a
  bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
  bzrlib/tests/workingtree_implementations/__init__.py __init__.py-20060203003124-b2aa5aca21a8bfad
  bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS	2008-09-19 06:53:41 +0000
+++ b/NEWS	2008-09-22 05:15:20 +0000
@@ -47,6 +47,9 @@
 
   INTERNALS:
 
+    * New race-free method on MutableTree ``get_file_with_stat`` for use
+      when generating stat cache results. (Robert Collins)
+
 
 bzr 1.7rc1 2008-09-09
 ---------------------

=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py	2008-09-19 06:53:41 +0000
+++ b/bzrlib/mutabletree.py	2008-09-22 05:15:20 +0000
@@ -201,6 +201,20 @@
         """Helper function for add - sets the entries of kinds."""
         raise NotImplementedError(self._gather_kinds)
 
+    def get_file_with_stat(self, file_id, path=None):
+        """Get a file handle and stat object for file_id.
+
+        The default implementation returns (self.get_file, None) for backwards
+        compatibility.
+
+        :param file_id: The file id to read.
+        :param path: The path of the file, if it is known.
+        :return: A tuple (file_handle, stat_value_or_None). If the tree has
+            no stat facility, or need for a stat cache feedback during commit,
+            it may return None for the second element of the tuple.
+        """
+        return (self.get_file(file_id, path), None)
+
     @needs_read_lock
     def last_revision(self):
         """Return the revision id of the last commit performed in this tree.
@@ -247,23 +261,20 @@
         """
         raise NotImplementedError(self.mkdir)
 
-    def _observed_sha1(self, file_id, path, sha1):
+    def _observed_sha1(self, file_id, path, (sha1, stat_value)):
         """Tell the tree we have observed a paths sha1.
 
         The intent of this function is to allow trees that have a hashcache to
-        update the hashcache during commit. If the observed file is too new to
-        be safely hash-cached the tree will ignore it; this will likewise mean
-        that a file changed subsequent to the file's being read and sha'd will
-        not lead to a false cache entry. A file move could cause this, and 
-        in future work it would be better to pass the cache fingerprint around
-        so that its never separated from the sha, and we can supply the
-        fingerprint back to the tree during this code path.
+        update the hashcache during commit. If the observed file is too new
+        (based on the stat_value) to be safely hash-cached the tree will ignore
+        it. 
 
         The default implementation does nothing.
 
         :param file_id: The file id
         :param path: The file path
         :param sha1: The sha 1 that was observed.
+        :param stat_value: A stat result for the file the sha1 was read from.
         :return: None
         """
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-09-19 06:53:41 +0000
+++ b/bzrlib/repository.py	2008-09-22 05:15:20 +0000
@@ -338,6 +338,9 @@
             # if the kind changed the content obviously has
             if kind != parent_entry.kind:
                 store = True
+        # Stat cache fingerprint feedback for the caller - None as we usually
+        # don't generate one.
+        fingerprint = None
         if kind == 'file':
             if content_summary[2] is None:
                 raise ValueError("Files must not have executable = None")
@@ -367,10 +370,16 @@
                 # absence of a content change in the file.
                 nostore_sha = None
             ie.executable = content_summary[2]
-            lines = tree.get_file(ie.file_id, path).readlines()
+            file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
+            try:
+                lines = file_obj.readlines()
+            finally:
+                file_obj.close()
             try:
                 ie.text_sha1, ie.text_size = self._add_text_to_weave(
                     ie.file_id, lines, heads, nostore_sha)
+                # Let the caller know we generated a stat fingerprint.
+                fingerprint = (ie.text_sha1, stat_value)
             except errors.ExistingContent:
                 # Turns out that the file content was unchanged, and we were
                 # only going to store a new node if it was changed. Carry over
@@ -418,7 +427,7 @@
         else:
             raise NotImplementedError('unknown kind')
         ie.revision = self._new_revision_id
-        return self._get_delta(ie, basis_inv, path), True, ie.text_sha1
+        return self._get_delta(ie, basis_inv, path), True, fingerprint
 
     def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
         # Note: as we read the content directly from the tree, we know its not

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2008-09-04 20:32:04 +0000
+++ b/bzrlib/tests/__init__.py	2008-09-22 05:15:20 +0000
@@ -874,6 +874,21 @@
         self.assertEqual(mode, mode_test,
                          'mode mismatch %o != %o' % (mode, mode_test))
 
+    def assertEqualStat(self, expected, actual):
+        """assert that expected and actual are the same stat result.
+
+        :param expected: A stat result.
+        :param actual: A stat result.
+        :raises AssertionError: If the expected and actual stat values differ
+            other than by atime.
+        """
+        self.assertEqual(expected.st_size, actual.st_size)
+        self.assertEqual(expected.st_mtime, actual.st_mtime)
+        self.assertEqual(expected.st_ctime, actual.st_ctime)
+        self.assertEqual(expected.st_dev, actual.st_dev)
+        self.assertEqual(expected.st_ino, actual.st_ino)
+        self.assertEqual(expected.st_mode, actual.st_mode)
+
     def assertPositive(self, val):
         """Assert that val is greater than 0."""
         self.assertTrue(val > 0, 'expected a positive value, but got %s' % val)

=== modified file 'bzrlib/tests/per_repository/test_commit_builder.py'
--- a/bzrlib/tests/per_repository/test_commit_builder.py	2008-09-19 06:53:41 +0000
+++ b/bzrlib/tests/per_repository/test_commit_builder.py	2008-09-22 05:15:20 +0000
@@ -414,7 +414,11 @@
             else:
                 self.assertFalse(version_recorded)
             if expect_fs_hash:
-                self.assertEqual(tree.get_file_sha1(file_id), fs_hash)
+                tree_file_stat = tree.get_file_with_stat(file_id)
+                tree_file_stat[0].close()
+                self.assertEqual(2, len(fs_hash))
+                self.assertEqual(tree.get_file_sha1(file_id), fs_hash[0])
+                self.assertEqualStat(tree_file_stat[1], fs_hash[1])
             else:
                 self.assertEqual(None, fs_hash)
             new_entry = builder.new_inventory[file_id]

=== modified file 'bzrlib/tests/test_selftest.py'
--- a/bzrlib/tests/test_selftest.py	2008-09-04 21:05:18 +0000
+++ b/bzrlib/tests/test_selftest.py	2008-09-22 05:15:20 +0000
@@ -502,6 +502,19 @@
         self.assertIsSameRealPath(self.test_dir, cwd)
         self.assertIsSameRealPath(self.test_home_dir, os.environ['HOME'])
 
+    def test_assertEqualStat_equal(self):
+        from bzrlib.tests.test_dirstate import _FakeStat
+        self.build_tree(["foo"])
+        real = os.lstat("foo")
+        fake = _FakeStat(real.st_size, real.st_mtime, real.st_ctime,
+            real.st_dev, real.st_ino, real.st_mode)
+        self.assertEqualStat(real, fake)
+
+    def test_assertEqualStat_notequal(self):
+        self.build_tree(["foo", "bar"])
+        self.assertRaises(AssertionError, self.assertEqualStat,
+            os.lstat("foo"), os.lstat("bar"))
+
 
 class TestTestCaseWithMemoryTransport(TestCaseWithMemoryTransport):
 

=== modified file 'bzrlib/tests/test_workingtree_4.py'
--- a/bzrlib/tests/test_workingtree_4.py	2008-09-19 06:53:41 +0000
+++ b/bzrlib/tests/test_workingtree_4.py	2008-09-22 05:15:20 +0000
@@ -599,9 +599,10 @@
     def test_observed_sha1_cachable(self):
         tree = self.get_tree_with_cachable_file_foo()
         expected_sha1 = osutils.sha_file_by_name('foo')
+        statvalue = os.lstat("foo")
         tree.lock_write()
         try:
-            tree._observed_sha1("foo-id", "foo", expected_sha1)
+            tree._observed_sha1("foo-id", "foo", (expected_sha1, statvalue))
             self.assertEqual(expected_sha1,
                 tree._get_entry(path="foo")[1][0][1])
         finally:
@@ -623,9 +624,117 @@
         tree.lock_write()
         try:
             tree._observed_sha1("foo-id", "foo",
-                osutils.sha_file_by_name('foo'))
+                (osutils.sha_file_by_name('foo'), os.lstat("foo")))
             # Must not have changed
             self.assertEqual(current_sha1,
                 tree._get_entry(path="foo")[1][0][1])
         finally:
             tree.unlock()
+
+    def test_get_file_with_stat_id_only(self):
+        # Explicit test to ensure we get a lstat value from WT4 trees.
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id')
+        expected = os.lstat('foo')
+        self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
+
+class TestCorruptDirstate(TestCaseWithTransport):
+    """Tests for how we handle when the dirstate has been corrupted."""
+
+    def create_wt4(self):
+        control = bzrdir.BzrDirMetaFormat1().initialize(self.get_url())
+        control.create_repository()
+        control.create_branch()
+        tree = workingtree_4.WorkingTreeFormat4().initialize(control)
+        return tree
+
+    def test_invalid_rename(self):
+        tree = self.create_wt4()
+        # Create a corrupted dirstate
+        tree.lock_write()
+        try:
+            tree.commit('init') # We need a parent, or we always compare with NULL
+            state = tree.current_dirstate()
+            state._read_dirblocks_if_needed()
+            # Now add in an invalid entry, a rename with a dangling pointer
+            state._dirblocks[1][1].append((('', 'foo', 'foo-id'),
+                                            [('f', '', 0, False, ''),
+                                             ('r', 'bar', 0 , False, '')]))
+            self.assertListRaises(errors.CorruptDirstate,
+                                  tree.iter_changes, tree.basis_tree())
+        finally:
+            tree.unlock()
+
+    def get_simple_dirblocks(self, state):
+        """Extract the simple information from the DirState.
+
+        This returns the dirblocks, only with the sha1sum and stat details
+        filtered out.
+        """
+        simple_blocks = []
+        for block in state._dirblocks:
+            simple_block = (block[0], [])
+            for entry in block[1]:
+                # Include the key for each entry, and for each parent include
+                # just the minikind, so we know if it was
+                # present/absent/renamed/etc
+                simple_block[1].append((entry[0], [i[0] for i in entry[1]]))
+            simple_blocks.append(simple_block)
+        return simple_blocks
+
+    def test_update_basis_with_invalid_delta(self):
+        """When given an invalid delta, it should abort, and not be saved."""
+        self.build_tree(['dir/', 'dir/file'])
+        tree = self.create_wt4()
+        tree.lock_write()
+        self.addCleanup(tree.unlock)
+        tree.add(['dir', 'dir/file'], ['dir-id', 'file-id'])
+        first_revision_id = tree.commit('init')
+
+        root_id = tree.path2id('')
+        state = tree.current_dirstate()
+        state._read_dirblocks_if_needed()
+        self.assertEqual([
+            ('', [(('', '', root_id), ['d', 'd'])]),
+            ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+            ('dir', [(('dir', 'file', 'file-id'), ['f', 'f'])]),
+        ],  self.get_simple_dirblocks(state))
+
+        tree.remove(['dir/file'])
+        self.assertEqual([
+            ('', [(('', '', root_id), ['d', 'd'])]),
+            ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+            ('dir', [(('dir', 'file', 'file-id'), ['a', 'f'])]),
+        ],  self.get_simple_dirblocks(state))
+        # Make sure the removal is written to disk
+        tree.flush()
+
+        # self.assertRaises(Exception, tree.update_basis_by_delta,
+        new_dir = inventory.InventoryDirectory('dir-id', 'new-dir', root_id)
+        new_dir.revision = 'new-revision-id'
+        new_file = inventory.InventoryFile('file-id', 'new-file', root_id)
+        new_file.revision = 'new-revision-id'
+        self.assertRaises(errors.InconsistentDelta,
+            tree.update_basis_by_delta, 'new-revision-id',
+            [('dir', 'new-dir', 'dir-id', new_dir),
+             ('dir/file', 'new-dir/new-file', 'file-id', new_file),
+            ])
+        del state
+
+        # Now when we re-read the file it should not have been modified
+        tree.unlock()
+        tree.lock_read()
+        self.assertEqual(first_revision_id, tree.last_revision())
+        state = tree.current_dirstate()
+        state._read_dirblocks_if_needed()
+        self.assertEqual([
+            ('', [(('', '', root_id), ['d', 'd'])]),
+            ('', [(('', 'dir', 'dir-id'), ['d', 'd'])]),
+            ('dir', [(('dir', 'file', 'file-id'), ['a', 'f'])]),
+        ],  self.get_simple_dirblocks(state))

=== modified file 'bzrlib/tests/workingtree_implementations/__init__.py'
--- a/bzrlib/tests/workingtree_implementations/__init__.py	2008-07-17 00:42:40 +0000
+++ b/bzrlib/tests/workingtree_implementations/__init__.py	2008-09-22 05:15:20 +0000
@@ -101,6 +101,7 @@
         'bzrlib.tests.workingtree_implementations.test_commit',
         'bzrlib.tests.workingtree_implementations.test_executable',
         'bzrlib.tests.workingtree_implementations.test_flush',
+        'bzrlib.tests.workingtree_implementations.test_get_file_with_stat',
         'bzrlib.tests.workingtree_implementations.test_get_file_mtime',
         'bzrlib.tests.workingtree_implementations.test_get_parent_ids',
         'bzrlib.tests.workingtree_implementations.test_inv',

=== added file 'bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py'
--- a/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py	2008-09-22 05:15:20 +0000
@@ -0,0 +1,49 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Test that all WorkingTree's implement get_file_with_stat."""
+
+import os
+
+from bzrlib.tests.workingtree_implementations import TestCaseWithWorkingTree
+
+
+class TestGetFileWithStat(TestCaseWithWorkingTree):
+
+    def test_get_file_with_stat_id_only(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id')
+        if statvalue is not None:
+            expected = os.lstat('foo')
+            self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
+    def test_get_file_with_stat_id_and_path(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id', 'foo')
+        expected = os.lstat('foo')
+        if statvalue is not None:
+            expected = os.lstat('foo')
+            self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2008-09-05 05:12:35 +0000
+++ b/bzrlib/workingtree.py	2008-09-22 05:15:20 +0000
@@ -424,9 +424,14 @@
         return osutils.lexists(self.abspath(filename))
 
     def get_file(self, file_id, path=None):
+        return self.get_file_with_stat(file_id, path)[0]
+
+    def get_file_with_stat(self, file_id, path=None, _fstat=os.fstat):
+        """See MutableTree.get_file_with_stat."""
         if path is None:
             path = self.id2path(file_id)
-        return self.get_file_byname(path)
+        file_obj = self.get_file_byname(path)
+        return (file_obj, _fstat(file_obj.fileno()))
 
     def get_file_text(self, file_id):
         return self.get_file(file_id).read()

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2008-09-19 06:53:41 +0000
+++ b/bzrlib/workingtree_4.py	2008-09-22 05:15:20 +0000
@@ -548,11 +548,10 @@
                 # path is missing on disk.
                 continue
 
-    def _observed_sha1(self, file_id, path, sha1):
+    def _observed_sha1(self, file_id, path, (sha1, statvalue)):
         """See MutableTree._observed_sha1."""
         state = self.current_dirstate()
         entry = self._get_entry(file_id=file_id, path=path)
-        statvalue = os.lstat(self.abspath(path))
         state._observed_sha1(entry, sha1, statvalue)
 
     def kind(self, file_id):




More information about the bazaar-commits mailing list