Rev 2491: Update to dirstate locking. in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate-nohc

John Arbash Meinel john at arbash-meinel.com
Thu Mar 1 21:56:31 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate-nohc

------------------------------------------------------------
revno: 2491
revision-id: john at arbash-meinel.com-20070301215619-wpt6kz8yem3ypu1b
parent: john at arbash-meinel.com-20070301195838-7p053os20qwr6qf7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate-nohc
timestamp: Thu 2007-03-01 15:56:19 -0600
message:
  Update to dirstate locking.
  Move all of WT4.lock_* functions locally, so that they can
  properly interact and cleanup around when we lock/unlock the
  dirstate file.
  Change all Lock objects to be non-blocking. So that if someone
  grabs a lock on the DirState we find out immediately, rather
  than blocking.
  Change WT4.unlock() so that if the dirstate is dirty, it will
  save the contents even if it only has a read lock.
  It does this by trying to take a write lock, if it fails
  we just ignore it. If it succeeds, then we can flush to disk.
  This is more important now that DirState tracks file changes.
  It allows 'bzr status' to update the cached stat and sha values.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/lock.py                 lock.py-20050527050856-ec090bb51bc03349
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-03-01 19:58:38 +0000
+++ b/bzrlib/dirstate.py	2007-03-01 21:56:19 +0000
@@ -194,6 +194,7 @@
 import bisect
 import codecs
 import cStringIO
+import errno
 import os
 import sha
 import struct
@@ -285,6 +286,7 @@
         self._state_file = None
         self._filename = path
         self._lock_token = None
+        self._lock_state = None
         self._id_index = None
         self._end_of_header = None
         self._split_path_cache = {}
@@ -1028,9 +1030,13 @@
         # the internal _dirblocks. So the dirblock state must have already been
         # read.
         assert self._dirblock_state != DirState.NOT_IN_MEMORY
-        assert entry[1][0][0] == 'f', \
-            'can only get sha1 for a file not: %s %s' % (
-            DirState._minikind_to_kind[entry[1][0][0]], entry[0])
+        # TODO: jam 20070301 Because we now allow kind changes (files => dirs)
+        #       we should actually base this check on the stat value, since
+        #       that is the absolute measurement of whether we have a file or
+        #       directory or link. That means that this function might actually
+        #       change an entry from being a file => dir or dir => file, etc.
+        if entry[1][0][0] != 'f':
+            return None
         if stat_value is None:
             stat_value = os.lstat(abspath)
         packed_stat = pack_stat(stat_value)
@@ -1563,16 +1569,15 @@
         num_entries_line = self._state_file.readline()
         assert num_entries_line.startswith('num_entries: '), 'missing num_entries line'
         self._num_entries = int(num_entries_line[len('num_entries: '):-1])
-    
+
     def save(self):
         """Save any pending changes created during this session.
-        
+
         We reuse the existing file, because that prevents race conditions with
-        file creation, and we expect to be using oslocks on it in the near 
-        future to prevent concurrent modification and reads - because dirstates
-        incremental data aggretation is not compatible with reading a modified
-        file, and replacing a file in use by another process is impossible on 
-        windows.
+        file creation, and use oslocks on it to prevent concurrent modification
+        and reads - because dirstates incremental data aggretation is not
+        compatible with reading a modified file, and replacing a file in use by
+        another process is impossible on windows.
 
         A dirstate in read only mode should be smart enough though to validate
         that the file has not changed, and otherwise discard its cache and
@@ -1581,12 +1586,38 @@
         """
         if (self._header_state == DirState.IN_MEMORY_MODIFIED or
             self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
-            self._state_file.seek(0)
-            self._state_file.writelines(self.get_lines())
-            self._state_file.truncate()
-            self._state_file.flush()
-            self._header_state = DirState.IN_MEMORY_UNMODIFIED
-            self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
+
+            if self._lock_state == 'w':
+                out_file = self._state_file
+                wlock = None
+            else:
+                # Try to grab a write lock so that we can update the file.
+                try:
+                    wlock = lock.WriteLock(self._filename)
+                except (errors.LockError, errors.LockContention), e:
+                    # We couldn't grab the lock, so just leave things dirty in
+                    # memory.
+                    return
+                except IOError, e:
+                    # This may be a read-only tree, or someone else may have a
+                    # ReadLock. so handle the case when we cannot grab a write
+                    # lock
+                    if e.errno in (errno.ENOENT, errno.EPERM, errno.EACCES,
+                                   errno.EAGAIN):
+                        # Ignore these errors and just don't save anything
+                        return
+                    raise
+                out_file = wlock.f
+            try:
+                out_file.seek(0)
+                out_file.writelines(self.get_lines())
+                out_file.truncate()
+                out_file.flush()
+                self._header_state = DirState.IN_MEMORY_UNMODIFIED
+                self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
+            finally:
+                if wlock is not None:
+                    wlock.unlock()
 
     def _set_data(self, parent_ids, dirblocks):
         """Set the full dirstate data in memory.
@@ -2053,7 +2084,12 @@
         """Acquire a read lock on the dirstate"""
         if self._lock_token is not None:
             raise errors.LockContention(self._lock_token)
+        # TODO: jam 20070301 Rather than wiping completely, if the blocks are
+        #       already in memory, we could read just the header and check for
+        #       any modification. If not modified, we can just leave things
+        #       alone
         self._lock_token = lock.ReadLock(self._filename)
+        self._lock_state = 'r'
         self._state_file = self._lock_token.f
         self._wipe_state()
 
@@ -2061,7 +2097,12 @@
         """Acquire a write lock on the dirstate"""
         if self._lock_token is not None:
             raise errors.LockContention(self._lock_token)
+        # TODO: jam 20070301 Rather than wiping completely, if the blocks are
+        #       already in memory, we could read just the header and check for
+        #       any modification. If not modified, we can just leave things
+        #       alone
         self._lock_token = lock.WriteLock(self._filename)
+        self._lock_state = 'w'
         self._state_file = self._lock_token.f
         self._wipe_state()
 
@@ -2069,7 +2110,12 @@
         """Drop any locks held on the dirstate"""
         if self._lock_token is None:
             raise errors.LockNotHeld(self)
+        # TODO: jam 20070301 Rather than wiping completely, if the blocks are
+        #       already in memory, we could read just the header and check for
+        #       any modification. If not modified, we can just leave things
+        #       alone
         self._state_file = None
+        self._lock_state = None
         self._lock_token.unlock()
         self._lock_token = None
         self._split_path_cache = {}

=== modified file 'bzrlib/lock.py'
--- a/bzrlib/lock.py	2007-02-25 22:50:34 +0000
+++ b/bzrlib/lock.py	2007-03-01 21:56:19 +0000
@@ -87,7 +87,7 @@
         def _clear_f(self):
             """Clear the self.f attribute cleanly."""
             self.f.close()
-            del self.f 
+            del self.f
 
 
     class _fcntl_WriteLock(_fcntl_FileLock):
@@ -97,17 +97,22 @@
         def __init__(self, filename):
             # standard IO errors get exposed directly.
             self._open(filename, 'rb+')
+            self.filename = realpath(filename)
+            if self.filename in self.open_locks:
+                self._clear_f()
+                raise LockContention("Lock already held.")
+            # reserve a slot for this lock - even if the lockf call fails,
+            # at thisi point unlock() will be called, because self.f is set.
+            # TODO: make this fully threadsafe, if we decide we care.
+            self.open_locks[self.filename] = self.filename
             try:
-                self.filename = realpath(filename)
-                if self.filename in self.open_locks:
-                    self._clear_f()
-                    raise LockContention("Lock already held.")
-                # reserve a slot for this lock - even if the lockf call fails, 
-                # at thisi point unlock() will be called, because self.f is set.
-                # TODO: make this fully threadsafe, if we decide we care.
-                self.open_locks[self.filename] = self.filename
-                fcntl.lockf(self.f, fcntl.LOCK_EX)
+                # LOCK_NB will cause IOError to be raised if we can't grab a
+                # lock right away.
+                fcntl.lockf(self.f, fcntl.LOCK_EX | fcntl.LOCK_NB)
             except IOError, e:
+                if e.errno in (errno.EAGAIN, errno.EACCES):
+                    # We couldn't grab the lock
+                    self.unlock()
                 # we should be more precise about whats a locking
                 # error and whats a random-other error
                 raise LockError(e)
@@ -123,7 +128,9 @@
             # standard IO errors get exposed directly.
             self._open(filename, 'rb')
             try:
-                fcntl.lockf(self.f, fcntl.LOCK_SH)
+                # LOCK_NB will cause IOError to be raised if we can't grab a
+                # lock right away.
+                fcntl.lockf(self.f, fcntl.LOCK_SH | fcntl.LOCK_NB)
             except IOError, e:
                 # we should be more precise about whats a locking
                 # error and whats a random-other error
@@ -198,6 +205,7 @@
                 LOCK_SH = 1
                 LOCK_EX = 2
                 LOCK_NB = 4
+
                 def unlock(self):
                     _msvc_unlock(self.f)
                     self.f.close()
@@ -206,12 +214,14 @@
 
             class _msvc_ReadLock(_msvc_FileLock):
                 def __init__(self, filename):
-                    _msvc_lock(self._open(filename, 'rb'), self.LOCK_SH)
+                    _msvc_lock(self._open(filename, 'rb'),
+                               self.LOCK_SH | self.LOCK_NB)
 
 
             class _msvc_WriteLock(_msvc_FileLock):
                 def __init__(self, filename):
-                    _msvc_lock(self._open(filename, 'rb+'), self.LOCK_EX)
+                    _msvc_lock(self._open(filename, 'rb+'),
+                               self.LOCK_EX | self.LOCK_NB)
 
 
             def _msvc_lock(f, flags):

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-03-01 19:58:38 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-03-01 21:56:19 +0000
@@ -1109,6 +1109,14 @@
         digest = state.get_sha1_for_entry(entry, abspath='a')
         self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6', digest)
 
+    def test_get_sha1_from_entry_dir(self):
+        state = Sha1InstrumentedDirState.initialize('dirstate')
+        self.addCleanup(state.unlock)
+        self.build_tree(['a/'])
+        state.add('a', 'a-id', 'directory', None, '')
+        entry = state._get_entry(0, path_utf8='a')
+        self.assertIs(None, state.get_sha1_for_entry(entry, 'a'))
+
 
 class TestPackStat(TestCaseWithTransport):
 

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-03-01 19:58:38 +0000
+++ b/bzrlib/workingtree_4.py	2007-03-01 21:56:19 +0000
@@ -224,12 +224,12 @@
         return result
 
     def current_dirstate(self):
-        """Return the current dirstate object. 
+        """Return the current dirstate object.
 
         This is not part of the tree interface and only exposed for ease of
         testing.
 
-        :raises errors.NotWriteLocked: when not in a lock. 
+        :raises errors.NotWriteLocked: when not in a lock.
         """
         if not self._control_files._lock_count:
             raise errors.ObjectNotLocked(self)
@@ -237,7 +237,7 @@
 
     def _current_dirstate(self):
         """Internal function that does not check lock status.
-        
+
         This is needed for break_lock which also needs the dirstate.
         """
         if self._dirstate is not None:
@@ -433,22 +433,45 @@
             return None
 
     def lock_read(self):
-        super(WorkingTree4, self).lock_read()
-        if self._dirstate is None:
-            self.current_dirstate()
-            self._dirstate.lock_read()
+        """See Branch.lock_read, and WorkingTree.unlock."""
+        self.branch.lock_read()
+        try:
+            self._control_files.lock_read()
+            try:
+                state = self.current_dirstate()
+                if not state._lock_token:
+                    state.lock_read()
+            except:
+                self._control_files.unlock()
+                raise
+        except:
+            self.branch.unlock()
+            raise
+
+    def _lock_self_write(self):
+        """This should be called after the branch is locked."""
+        try:
+            self._control_files.lock_write()
+            try:
+                state = self.current_dirstate()
+                if not state._lock_token:
+                    state.lock_write()
+            except:
+                self._control_files.unlock()
+                raise
+        except:
+            self.branch.unlock()
+            raise
 
     def lock_tree_write(self):
-        super(WorkingTree4, self).lock_tree_write()
-        if self._dirstate is None:
-            self.current_dirstate()
-            self._dirstate.lock_write()
+        """See MutableTree.lock_tree_write, and WorkingTree.unlock."""
+        self.branch.lock_read()
+        self._lock_self_write()
 
     def lock_write(self):
-        super(WorkingTree4, self).lock_write()
-        if self._dirstate is None:
-            self.current_dirstate()
-            self._dirstate.lock_write()
+        """See MutableTree.lock_write, and WorkingTree.unlock."""
+        self.branch.lock_write()
+        self._lock_self_write()
 
     @needs_tree_write_lock
     def move(self, from_paths, to_dir, after=False):
@@ -934,7 +957,13 @@
                 if self._dirty:
                     self.flush()
             if self._dirstate is not None:
+                # This is a no-op if there are no modifications.
+                self._dirstate.save()
                 self._dirstate.unlock()
+            # TODO: jam 20070301 We shouldn't have to wipe the dirstate at this
+            #       point. Instead, it could check if the header has been
+            #       modified when it is locked, and if not, it can hang on to
+            #       the data it has in memory.
             self._dirstate = None
             self._inventory = None
         # reverse order of locking.



More information about the bazaar-commits mailing list