Rev 2492: (broken) Change get_sha1_for_entry into update_entry in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate-nohc

John Arbash Meinel john at arbash-meinel.com
Fri Mar 2 00:07:40 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate-nohc

------------------------------------------------------------
revno: 2492
revision-id: john at arbash-meinel.com-20070302000729-5qybyk0c6aldecpj
parent: john at arbash-meinel.com-20070301215619-wpt6kz8yem3ypu1b
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate-nohc
timestamp: Thu 2007-03-01 18:07:29 -0600
message:
  (broken) Change get_sha1_for_entry into update_entry
  which reads off of the disk, and updates dirblocks.
  This includes handling when files change into directories, and other
  such content changes.
  For some reason it breaks higher level tests, so for now, I'm
  leaving this as a local commit until I can sort out the rest.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-03-01 21:56:19 +0000
+++ b/bzrlib/dirstate.py	2007-03-02 00:07:29 +0000
@@ -197,6 +197,7 @@
 import errno
 import os
 import sha
+from stat import S_IEXEC
 import struct
 import time
 import zlib
@@ -1017,55 +1018,82 @@
             raise
         return result
 
-    def get_sha1_for_entry(self, entry, abspath, stat_value=None):
-        """Return the sha1 sum for a given file.
+    def update_entry(self, entry, abspath, stat_value=None):
+        """Update the entry based on what is actually on disk.
 
         :param entry: This is the dirblock entry for the file in question.
         :param abspath: The path on disk for this file.
         :param stat_value: (optional) if we already have done a stat on the
             file, re-use it.
-        :return: The sha1 hexdigest of the file (40 bytes)
+        :return: The sha1 hexdigest of the file (40 bytes) or link target of a
+                symlink.
         """
         # This code assumes that the entry passed in is directly held in one of
         # the internal _dirblocks. So the dirblock state must have already been
         # read.
         assert self._dirblock_state != DirState.NOT_IN_MEMORY
-        # TODO: jam 20070301 Because we now allow kind changes (files => dirs)
-        #       we should actually base this check on the stat value, since
-        #       that is the absolute measurement of whether we have a file or
-        #       directory or link. That means that this function might actually
-        #       change an entry from being a file => dir or dir => file, etc.
-        if entry[1][0][0] != 'f':
+        if stat_value is None:
+            try:
+                # We could inline os.lstat but the common case is that
+                # stat_value will be passed in, not read here.
+                stat_value = self._lstat(abspath, entry)
+            except (OSError, IOError), e:
+                if e.errno in (errno.ENOENT, errno.EACCES,
+                               errno.EPERM):
+                    # The entry is missing, consider it gone
+                    return None
+                raise
+
+        kind = osutils.file_kind_from_stat_mode(stat_value.st_mode)
+        try:
+            minikind = DirState._kind_to_minikind[kind]
+        except KeyError: # Unknown kind
             return None
-        if stat_value is None:
-            stat_value = os.lstat(abspath)
         packed_stat = pack_stat(stat_value)
-        saved_sha1_digest = entry[1][0][1]
-        saved_file_size = entry[1][0][2]
-        saved_packed_stat = entry[1][0][4]
-        if (packed_stat == saved_packed_stat
-            and saved_sha1_digest != ''
+        (saved_minikind, saved_link_or_sha1, saved_file_size,
+         saved_executable, saved_packed_stat) = entry[1][0]
+
+        if (minikind == saved_minikind
+            and packed_stat == saved_packed_stat
             # size should also be in packed_stat
             and saved_file_size == stat_value.st_size):
             # The stat hasn't changed since we saved, so we can potentially
             # re-use the saved sha hash.
+            if minikind != 'f':
+                return None
+
             cutoff = self._sha_cutoff_time()
             if (stat_value.st_mtime < cutoff
                 and stat_value.st_ctime < cutoff):
                 # Return the existing fingerprint
-                return saved_sha1_digest
+                return saved_link_or_sha1
+
         # If we have gotten this far, that means that we need to actually
-        # process the file.
-        new_sha1_digest = self._sha1_file(abspath)
-        # TODO: jam 20070301 Is it worth checking to see if the new sha is
-        #       actually different? I'm guessing not, because we wouldn't have
-        #       gotten this far otherwise.
-        entry[1][0] = ('f', new_sha1_digest, stat_value.st_size,
-                       entry[1][0][3], # Executable?
-                       packed_stat
-                      )
+        # process this entry.
+        link_or_sha1 = None
+        if minikind == 'f':
+            link_or_sha1 = self._sha1_file(abspath, entry)
+            executable = self._is_executable(stat_value.st_mode,
+                                             saved_executable)
+            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
+                           executable, packed_stat)
+        elif minikind == 'd':
+            link_or_sha1 = None
+            entry[1][0] = ('d', '', 0, False, packed_stat)
+            if saved_minikind != 'd':
+                # This changed from something into a directory. Make sure we
+                # have a directory block for it. This doesn't happen very
+                # often, so this doesn't have to be super fast.
+                block_index, entry_index, dir_present, file_present = \
+                    self._get_block_entry_index(entry[0][0], entry[0][1], 0)
+                self._ensure_block(block_index, entry_index,
+                                   osutils.pathjoin(entry[0][0], entry[0][1]))
+        elif minikind == 'l':
+            link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
+            entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
+                           False, packed_stat)
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
-        return new_sha1_digest
+        return link_or_sha1
 
     def _sha_cutoff_time(self):
         """Return cutoff time.
@@ -1073,9 +1101,17 @@
         Files modified more recently than this time are at risk of being
         undetectably modified and so can't be cached.
         """
+        # TODO: jam 20070301 Cache the cutoff time as long as we hold a lock.
+        #       time.time() isn't super expensive (approx 3.38us), but
+        #       when you call it 50,000 times it adds up.
+        #       For comparison, os.lstat() costs 7.2us if it is hot.
         return int(time.time()) - 3
 
-    def _sha1_file(self, abspath):
+    def _lstat(self, abspath, entry):
+        """Return the os.lstat value for this path."""
+        return os.lstat(abspath)
+
+    def _sha1_file(self, abspath, entry):
         """Calculate the SHA1 of a file by reading the full text"""
         f = file(abspath, 'rb', buffering=65000)
         try:
@@ -1083,6 +1119,17 @@
         finally:
             f.close()
 
+    def _is_executable(self, mode, old_executable):
+        """Is this file executable?"""
+        # TODO: jam 20070301 Win32 should just return the original value
+        return bool(S_IEXEC & mode)
+
+    def _read_link(self, abspath, old_link):
+        """Read the target of a symlink"""
+        # TODO: jam 200700301 On Win32, this could just return the value
+        #       already in memory.
+        return os.readlink(abspath)
+
     def get_ghosts(self):
         """Return a list of the parent tree revision ids that are ghosts."""
         self._read_header_if_needed()

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-03-01 21:56:19 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-03-02 00:07:29 +0000
@@ -1005,21 +1005,34 @@
         self.assertEqual(expected, dirblock_names)
 
 
-class Sha1InstrumentedDirState(dirstate.DirState):
+class InstrumentedDirState(dirstate.DirState):
     """An DirState with instrumented sha1 functionality."""
 
     def __init__(self, path):
-        super(Sha1InstrumentedDirState, self).__init__(path)
+        super(InstrumentedDirState, self).__init__(path)
         self._time_offset = 0
-        self._sha1_log = []
+        self._log = []
 
     def _sha_cutoff_time(self):
-        timestamp = super(Sha1InstrumentedDirState, self)._sha_cutoff_time()
+        timestamp = super(InstrumentedDirState, self)._sha_cutoff_time()
         return timestamp + self._time_offset
 
-    def _sha1_file(self, abspath):
-        self._sha1_log.append(abspath)
-        return super(Sha1InstrumentedDirState, self)._sha1_file(abspath)
+    def _sha1_file(self, abspath, entry):
+        self._log.append(('sha1', abspath))
+        return super(InstrumentedDirState, self)._sha1_file(abspath, entry)
+
+    def _read_link(self, abspath, old_link):
+        self._log.append(('read_link', abspath, old_link))
+        return super(InstrumentedDirState, self)._read_link(abspath, old_link)
+
+    def _lstat(self, abspath, entry):
+        self._log.append(('lstat', abspath))
+        return super(InstrumentedDirState, self)._lstat(abspath, entry)
+
+    def _is_executable(self, mode, old_executable):
+        self._log.append(('is_exec', mode, old_executable))
+        return super(InstrumentedDirState, self)._is_executable(mode,
+                                                                old_executable)
 
     def adjust_time(self, secs):
         """Move the clock forward or back.
@@ -1043,16 +1056,21 @@
         self.st_mode = mode
 
 
-class TestDirStateGetSha1(TestCaseWithDirState):
-    """Test the DirState.get_sha1 functions"""
+class TestUpdateEntry(TestCaseWithDirState):
+    """Test the DirState.update_entry functions"""
 
-    def test_get_sha1_from_entry(self):
-        state = Sha1InstrumentedDirState.initialize('dirstate')
+    def get_state_with_a(self):
+        """Create a DirState tracking a single object named 'a'"""
+        state = InstrumentedDirState.initialize('dirstate')
         self.addCleanup(state.unlock)
+        state.add('a', 'a-id', 'file', None, '')
+        entry = state._get_entry(0, path_utf8='a')
+        return state, entry
+
+    def test_update_entry(self):
+        state, entry = self.get_state_with_a()
         self.build_tree(['a'])
         # Add one where we don't provide the stat or sha already
-        state.add('a', 'a-id', 'file', None, '')
-        entry = state._get_entry(0, path_utf8='a')
         self.assertEqual(('', 'a', 'a-id'), entry[0])
         self.assertEqual([('f', '', 0, False, dirstate.DirState.NULLSTAT)],
                          entry[1])
@@ -1062,7 +1080,7 @@
                          state._dirblock_state)
 
         stat_value = os.lstat('a')
-        digest = state.get_sha1_for_entry(entry, abspath='a',
+        digest = state.update_entry(entry, abspath='a',
                                           stat_value=stat_value)
         self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6', digest)
         packed_stat = dirstate.pack_stat(stat_value)
@@ -1071,7 +1089,8 @@
         self.assertEqual([('f', digest, 14, False, packed_stat)], entry[1])
         self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
                          state._dirblock_state)
-        self.assertEqual(['a'], state._sha1_log)
+        mode = stat_value.st_mode
+        self.assertEqual([('sha1', 'a'), ('is_exec', mode, False)], state._log)
 
         state.save()
         self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
@@ -1082,9 +1101,11 @@
         # guaranteed to look too new.
         state.adjust_time(-10)
 
-        digest = state.get_sha1_for_entry(entry, abspath='a',
+        digest = state.update_entry(entry, abspath='a',
                                           stat_value=stat_value)
-        self.assertEqual(['a', 'a'], state._sha1_log)
+        self.assertEqual([('sha1', 'a'), ('is_exec', mode, False),
+                          ('sha1', 'a'), ('is_exec', mode, False),
+                         ], state._log)
         self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6', digest)
         self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
                          state._dirblock_state)
@@ -1093,29 +1114,158 @@
         # However, if we move the clock forward so the file is considered
         # "stable", it should just returned the cached value.
         state.adjust_time(20)
-        digest = state.get_sha1_for_entry(entry, abspath='a',
+        digest = state.update_entry(entry, abspath='a',
                                           stat_value=stat_value)
         self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6', digest)
-        self.assertEqual(['a', 'a'], state._sha1_log)
+        self.assertEqual([('sha1', 'a'), ('is_exec', mode, False),
+                          ('sha1', 'a'), ('is_exec', mode, False),
+                         ], state._log)
 
-    def test_get_sha1_from_entry_no_stat_value(self):
+    def test_update_entry_no_stat_value(self):
         """Passing the stat_value is optional."""
-        state = Sha1InstrumentedDirState.initialize('dirstate')
-        self.addCleanup(state.unlock)
+        state, entry = self.get_state_with_a()
         self.build_tree(['a'])
         # Add one where we don't provide the stat or sha already
-        state.add('a', 'a-id', 'file', None, '')
-        entry = state._get_entry(0, path_utf8='a')
-        digest = state.get_sha1_for_entry(entry, abspath='a')
+        digest = state.update_entry(entry, abspath='a')
         self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6', digest)
 
-    def test_get_sha1_from_entry_dir(self):
-        state = Sha1InstrumentedDirState.initialize('dirstate')
-        self.addCleanup(state.unlock)
-        self.build_tree(['a/'])
-        state.add('a', 'a-id', 'directory', None, '')
-        entry = state._get_entry(0, path_utf8='a')
-        self.assertIs(None, state.get_sha1_for_entry(entry, 'a'))
+    def test_update_entry_dir(self):
+        state, entry = self.get_state_with_a()
+        self.build_tree(['a/'])
+        self.assertIs(None, state.update_entry(entry, 'a'))
+
+    def create_and_test_file(self, state, entry):
+        """Create a file at 'a' and verify the state finds it.
+
+        The state should already be versioning *something* at 'a'. This makes
+        sure that state.update_entry recognizes it as a file.
+        """
+        self.build_tree(['a'])
+        stat_value = os.lstat('a')
+        packed_stat = dirstate.pack_stat(stat_value)
+
+        link_or_sha1 = state.update_entry(entry, abspath='a')
+        self.assertEqual('b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6',
+                         link_or_sha1)
+        self.assertEqual([('f', link_or_sha1, 14, False, packed_stat)],
+                         entry[1])
+        return packed_stat
+
+    def create_and_test_dir(self, state, entry):
+        """Create a directory at 'a' and verify the state finds it.
+
+        The state should already be versioning *something* at 'a'. This makes
+        sure that state.update_entry recognizes it as a directory.
+        """
+        self.build_tree(['a/'])
+        stat_value = os.lstat('a')
+        packed_stat = dirstate.pack_stat(stat_value)
+
+        link_or_sha1 = state.update_entry(entry, abspath='a')
+        self.assertIs(None, link_or_sha1)
+        self.assertEqual([('d', '', 0, False, packed_stat)], entry[1])
+
+        return packed_stat
+
+    def create_and_test_symlink(self, state, entry):
+        """Create a symlink at 'a' and verify the state finds it.
+
+        The state should already be versioning *something* at 'a'. This makes
+        sure that state.update_entry recognizes it as a symlink.
+
+        This should not be called if this platform does not have symlink
+        support.
+        """
+        os.symlink('path/to/foo', 'a')
+
+        stat_value = os.lstat('a')
+        packed_stat = dirstate.pack_stat(stat_value)
+
+        link_or_sha1 = state.update_entry(entry, abspath='a')
+        self.assertEqual('path/to/foo', link_or_sha1)
+        self.assertEqual([('l', 'path/to/foo', 11, False, packed_stat)],
+                         entry[1])
+        return packed_stat
+
+    def test_update_missing_file(self):
+        state, entry = self.get_state_with_a()
+        packed_stat = self.create_and_test_file(state, entry)
+        # Now if we delete the file, update_entry should recover and
+        # return None.
+        os.remove('a')
+        self.assertIs(None, state.update_entry(entry, abspath='a'))
+        # And the record shouldn't be changed.
+        digest = 'b50e5406bb5e153ebbeb20268fcf37c87e1ecfb6'
+        self.assertEqual([('f', digest, 14, False, packed_stat)],
+                         entry[1])
+
+    def test_update_missing_dir(self):
+        state, entry = self.get_state_with_a()
+        packed_stat = self.create_and_test_dir(state, entry)
+        # Now if we delete the directory, update_entry should recover and
+        # return None.
+        os.rmdir('a')
+        self.assertIs(None, state.update_entry(entry, abspath='a'))
+        self.assertEqual([('d', '', 0, False, packed_stat)], entry[1])
+
+    def test_update_missing_symlink(self):
+        if not osutils.has_symlinks():
+            return # PlatformDeficiency / TestSkipped
+        state, entry = self.get_state_with_a()
+        packed_stat = self.create_and_test_symlink(state, entry)
+        os.remove('a')
+        self.assertIs(None, state.update_entry(entry, abspath='a'))
+        # And the record shouldn't be changed.
+        self.assertEqual([('l', 'path/to/foo', 11, False, packed_stat)],
+                         entry[1])
+
+    def test_update_file_to_dir(self):
+        """If a file changes to a directory we return None for the sha.
+        We also update the inventory record.
+        """
+        state, entry = self.get_state_with_a()
+        self.create_and_test_file(state, entry)
+        os.remove('a')
+        self.create_and_test_dir(state, entry)
+
+    def test_update_file_to_symlink(self):
+        """File becomes a symlink"""
+        if not osutils.has_symlinks():
+            return # PlatformDeficiency / TestSkipped
+        state, entry = self.get_state_with_a()
+        self.create_and_test_file(state, entry)
+        os.remove('a')
+        self.create_and_test_symlink(state, entry)
+
+    def test_update_dir_to_file(self):
+        """Directory becoming a file updates the entry."""
+        state, entry = self.get_state_with_a()
+        self.create_and_test_dir(state, entry)
+        os.rmdir('a')
+        self.create_and_test_file(state, entry)
+
+    def test_update_dir_to_symlink(self):
+        """Directory becomes a symlink"""
+        if not osutils.has_symlinks():
+            return # PlatformDeficiency / TestSkipped
+        state, entry = self.get_state_with_a()
+        self.create_and_test_dir(state, entry)
+        os.rmdir('a')
+        self.create_and_test_symlink(state, entry)
+
+    def test_update_symlink_to_file(self):
+        """Symlink becomes a file"""
+        state, entry = self.get_state_with_a()
+        self.create_and_test_symlink(state, entry)
+        os.remove('a')
+        self.create_and_test_file(state, entry)
+
+    def test_update_symlink_to_dir(self):
+        """Symlink becomes a directory"""
+        state, entry = self.get_state_with_a()
+        self.create_and_test_symlink(state, entry)
+        os.remove('a')
+        self.create_and_test_dir(state, entry)
 
 
 class TestPackStat(TestCaseWithTransport):

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-03-01 21:56:19 +0000
+++ b/bzrlib/workingtree_4.py	2007-03-02 00:07:29 +0000
@@ -361,8 +361,11 @@
 
         file_abspath = self.abspath(path)
         state = self.current_dirstate()
-        return state.get_sha1_for_entry(entry, file_abspath,
-                                        stat_value=stat_value)
+        link_or_sha1 = state.update_entry(entry, file_abspath,
+                                          stat_value=stat_value)
+        if entry[1][0][0] == 'f':
+            return link_or_sha1
+        return None
 
     def _get_inventory(self):
         """Get the inventory for the tree. This is only valid within a lock."""
@@ -1599,6 +1602,11 @@
                 source_details = NULL_PARENT_DETAILS
             else:
                 source_details = entry[1][source_index]
+            if path_info is not None:
+                link_or_sha1 = state.update_entry(entry, abspath=path_info[4],
+                                                  stat_value=path_info[3])
+            else:
+                link_or_sha1 = None
             target_details = entry[1][target_index]
             source_minikind = source_details[0]
             target_minikind = target_details[0]
@@ -1649,26 +1657,15 @@
                         if source_minikind != 'f':
                             content_change = True
                         else:
-                            # has it changed? fast path: size, slow path: sha1.
-                            if source_details[2] != path_info[3].st_size:
-                                content_change = True
-                            else:
-                                # maybe the same. Get the hash
-                                new_hash = state.get_sha1_for_entry(entry,
-                                                abspath=path_info[4],
-                                                stat_value=path_info[3])
-                                content_change = (new_hash != source_details[1])
-                        target_exec = bool(
-                            stat.S_ISREG(path_info[3].st_mode)
-                            and stat.S_IEXEC & path_info[3].st_mode)
+                            # We could check the size, but we already have the
+                            # sha1 hash.
+                            content_change = (link_or_sha1 != source_details[1])
+                        target_exec = target_details[3]
                     elif target_kind == 'symlink':
                         if source_minikind != 'l':
                             content_change = True
                         else:
-                            # TODO: check symlink supported for windows users
-                            # and grab from target state here.
-                            link_target = os.readlink(path_info[4])
-                            content_change = (link_target != source_details[1])
+                            content_change = (link_or_sha1 != source_details[1])
                         target_exec = False
                     else:
                         raise Exception, "unknown kind %s" % path_info[2]



More information about the bazaar-commits mailing list