Rev 5809: (jameinel) Fix bug #765881, in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Apr 21 13:22:22 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5809 [merge]
revision-id: pqm at pqm.ubuntu.com-20110421132208-uc7obchku13062gz
parent: pqm at pqm.ubuntu.com-20110420210140-1akzbrzuwftl390r
parent: john at arbash-meinel.com-20110419142227-n0xwev5qs6h7nv91
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2011-04-21 13:22:08 +0000
message:
  (jameinel) Fix bug #765881,
   we don't need to save the dirstate file for trivial changes. (John A Meinel)
modified:
  bzrlib/_dirstate_helpers_pyx.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test__dirstate_helpers.py test_dirstate_helper-20070504035751-jsbn00xodv0y1eve-2
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'
--- a/bzrlib/_dirstate_helpers_pyx.pyx	2010-08-27 18:02:22 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx	2011-04-19 13:59:07 +0000
@@ -866,6 +866,7 @@
     # _st mode of the compiled stat objects.
     cdef int minikind, saved_minikind
     cdef void * details
+    cdef int worth_saving
     minikind = minikind_from_mode(stat_value.st_mode)
     if 0 == minikind:
         return None
@@ -900,6 +901,7 @@
     # If we have gotten this far, that means that we need to actually
     # process this entry.
     link_or_sha1 = None
+    worth_saving = 1
     if minikind == c'f':
         executable = self._is_executable(stat_value.st_mode,
                                          saved_executable)
@@ -916,10 +918,15 @@
             entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
                            executable, packed_stat)
         else:
-            entry[1][0] = ('f', '', stat_value.st_size,
-                           executable, DirState.NULLSTAT)
+            # This file is not worth caching the sha1. Either it is too new, or
+            # it is newly added. Regardless, the only things we are changing
+            # are derived from the stat, and so are not worth caching. So we do
+            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
+            # updated values if there is *other* data worth saving.)
+            entry[1][0] = ('f', '', stat_value.st_size, executable,
+                           DirState.NULLSTAT)
+            worth_saving = 0
     elif minikind == c'd':
-        link_or_sha1 = None
         entry[1][0] = ('d', '', 0, False, packed_stat)
         if saved_minikind != c'd':
             # This changed from something into a directory. Make sure we
@@ -929,6 +936,10 @@
                 self._get_block_entry_index(entry[0][0], entry[0][1], 0)
             self._ensure_block(block_index, entry_index,
                                pathjoin(entry[0][0], entry[0][1]))
+        else:
+            # Any changes are derived trivially from the stat object, not worth
+            # re-writing a dirstate for just this
+            worth_saving = 0
     elif minikind == c'l':
         link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
         if self._cutoff_time is None:
@@ -940,7 +951,8 @@
         else:
             entry[1][0] = ('l', '', stat_value.st_size,
                            False, DirState.NULLSTAT)
-    self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+    if worth_saving:
+        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
     return link_or_sha1
 
 

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2011-04-08 12:28:05 +0000
+++ b/bzrlib/dirstate.py	2011-04-19 13:59:07 +0000
@@ -3205,6 +3205,7 @@
     # If we have gotten this far, that means that we need to actually
     # process this entry.
     link_or_sha1 = None
+    worth_saving = True
     if minikind == 'f':
         executable = state._is_executable(stat_value.st_mode,
                                          saved_executable)
@@ -3226,6 +3227,7 @@
         else:
             entry[1][0] = ('f', '', stat_value.st_size,
                            executable, DirState.NULLSTAT)
+            worth_saving = False
     elif minikind == 'd':
         link_or_sha1 = None
         entry[1][0] = ('d', '', 0, False, packed_stat)
@@ -3237,6 +3239,8 @@
                 state._get_block_entry_index(entry[0][0], entry[0][1], 0)
             state._ensure_block(block_index, entry_index,
                                osutils.pathjoin(entry[0][0], entry[0][1]))
+        else:
+            worth_saving = False
     elif minikind == 'l':
         link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
         if state._cutoff_time is None:
@@ -3248,7 +3252,8 @@
         else:
             entry[1][0] = ('l', '', stat_value.st_size,
                            False, DirState.NULLSTAT)
-    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
+    if worth_saving:
+        state._dirblock_state = DirState.IN_MEMORY_MODIFIED
     return link_or_sha1
 
 

=== modified file 'bzrlib/tests/test__dirstate_helpers.py'
--- a/bzrlib/tests/test__dirstate_helpers.py	2011-01-10 22:20:12 +0000
+++ b/bzrlib/tests/test__dirstate_helpers.py	2011-04-19 13:59:07 +0000
@@ -871,10 +871,12 @@
                                           stat_value=stat_value)
         self.assertEqual(None, link_or_sha1)
 
-        # The dirblock entry should not have cached the file's sha1 (too new)
+        # The dirblock entry should not have computed or cached the file's
+        # sha1, but it did update the files' st_size. However, this is not
+        # worth writing a dirstate file for, so we leave the state UNMODIFIED
         self.assertEqual(('f', '', 14, False, dirstate.DirState.NULLSTAT),
                          entry[1][0])
-        self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
+        self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
         mode = stat_value.st_mode
         self.assertEqual([('is_exec', mode, False)], state._log)
@@ -883,9 +885,8 @@
         self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
 
-        # If we do it again right away, we don't know if the file has changed
-        # so we will re-read the file. Roll the clock back so the file is
-        # guaranteed to look too new.
+        # Roll the clock back so the file is guaranteed to look too new. We
+        # should still not compute the sha1.
         state.adjust_time(-10)
         del state._log[:]
 
@@ -893,7 +894,7 @@
                                           stat_value=stat_value)
         self.assertEqual([('is_exec', mode, False)], state._log)
         self.assertEqual(None, link_or_sha1)
-        self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
+        self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
         self.assertEqual(('f', '', 14, False, dirstate.DirState.NULLSTAT),
                          entry[1][0])
@@ -909,6 +910,8 @@
         self.assertEqual([('is_exec', mode, False)], state._log)
         self.assertEqual(('f', '', 14, False, dirstate.DirState.NULLSTAT),
                          entry[1][0])
+        self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
+                         state._dirblock_state)
 
         # If the file is no longer new, and the clock has been moved forward
         # sufficiently, it will cache the sha.
@@ -1005,12 +1008,25 @@
         self.build_tree(['a/'])
         state.adjust_time(+20)
         self.assertIs(None, self.do_update_entry(state, entry, 'a'))
+        # a/ used to be a file, but is now a directory, worth saving
         self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
                          state._dirblock_state)
         state.save()
         self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
-        self.assertIs(None, self.do_update_entry(state, entry, 'a'))
+        # No changes to a/ means not worth saving.
+        self.assertIs(None, self.do_update_entry(state, entry, 'a'))
+        self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
+                         state._dirblock_state)
+        # Change the last-modified time for the directory
+        t = time.time() - 100.0
+        os.utime('a', (t, t))
+        saved_packed_stat = entry[1][0][-1]
+        self.assertIs(None, self.do_update_entry(state, entry, 'a'))
+        # We *do* go ahead and update the information in the dirblocks, but we
+        # don't bother setting IN_MEMORY_MODIFIED because it is trivial to
+        # recompute.
+        self.assertNotEqual(saved_packed_stat, entry[1][0][-1])
         self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
 

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2011-01-25 22:54:08 +0000
+++ b/bzrlib/tests/test_dirstate.py	2011-04-19 14:22:27 +0000
@@ -527,6 +527,19 @@
 
 class TestDirStateOnFile(TestCaseWithDirState):
 
+    def create_updated_dirstate(self):
+        self.build_tree(['a-file'])
+        tree = self.make_branch_and_tree('.')
+        tree.add(['a-file'], ['a-id'])
+        tree.commit('add a-file')
+        # Save and unlock the state, re-open it in readonly mode
+        state = dirstate.DirState.from_tree(tree, 'dirstate')
+        state.save()
+        state.unlock()
+        state = dirstate.DirState.on_file('dirstate')
+        state.lock_read()
+        return state
+
     def test_construct_with_path(self):
         tree = self.make_branch_and_tree('tree')
         state = dirstate.DirState.from_tree(tree, 'dirstate.from_tree')
@@ -561,36 +574,24 @@
             state.unlock()
 
     def test_can_save_in_read_lock(self):
-        self.build_tree(['a-file'])
-        state = dirstate.DirState.initialize('dirstate')
-        try:
-            # No stat and no sha1 sum.
-            state.add('a-file', 'a-file-id', 'file', None, '')
-            state.save()
-        finally:
-            state.unlock()
-
-        # Now open in readonly mode
-        state = dirstate.DirState.on_file('dirstate')
-        state.lock_read()
+        state = self.create_updated_dirstate()
         try:
             entry = state._get_entry(0, path_utf8='a-file')
             # The current size should be 0 (default)
             self.assertEqual(0, entry[1][0][2])
             # We should have a real entry.
             self.assertNotEqual((None, None), entry)
-            # Make sure everything is old enough
+            # Set the cutoff-time into the future, so things look cacheable
             state._sha_cutoff_time()
-            state._cutoff_time += 10
-            # Change the file length
-            self.build_tree_contents([('a-file', 'shorter')])
-            sha1sum = dirstate.update_entry(state, entry, 'a-file',
-                os.lstat('a-file'))
-            # new file, no cached sha:
-            self.assertEqual(None, sha1sum)
+            state._cutoff_time += 10.0
+            st = os.lstat('a-file')
+            sha1sum = dirstate.update_entry(state, entry, 'a-file', st)
+            # We updated the current sha1sum because the file is cacheable
+            self.assertEqual('ecc5374e9ed82ad3ea3b4d452ea995a5fd3e70e3',
+                             sha1sum)
 
             # The dirblock has been updated
-            self.assertEqual(7, entry[1][0][2])
+            self.assertEqual(st.st_size, entry[1][0][2])
             self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
                              state._dirblock_state)
 
@@ -606,29 +607,24 @@
         state.lock_read()
         try:
             entry = state._get_entry(0, path_utf8='a-file')
-            self.assertEqual(7, entry[1][0][2])
+            self.assertEqual(st.st_size, entry[1][0][2])
         finally:
             state.unlock()
 
     def test_save_fails_quietly_if_locked(self):
         """If dirstate is locked, save will fail without complaining."""
-        self.build_tree(['a-file'])
-        state = dirstate.DirState.initialize('dirstate')
-        try:
-            # No stat and no sha1 sum.
-            state.add('a-file', 'a-file-id', 'file', None, '')
-            state.save()
-        finally:
-            state.unlock()
-
-        state = dirstate.DirState.on_file('dirstate')
-        state.lock_read()
+        state = self.create_updated_dirstate()
         try:
             entry = state._get_entry(0, path_utf8='a-file')
-            sha1sum = dirstate.update_entry(state, entry, 'a-file',
-                os.lstat('a-file'))
-            # No sha - too new
-            self.assertEqual(None, sha1sum)
+            # No cached sha1 yet.
+            self.assertEqual('', entry[1][0][1])
+            # Set the cutoff-time into the future, so things look cacheable
+            state._sha_cutoff_time()
+            state._cutoff_time += 10.0
+            st = os.lstat('a-file')
+            sha1sum = dirstate.update_entry(state, entry, 'a-file', st)
+            self.assertEqual('ecc5374e9ed82ad3ea3b4d452ea995a5fd3e70e3',
+                             sha1sum)
             self.assertEqual(dirstate.DirState.IN_MEMORY_MODIFIED,
                              state._dirblock_state)
 

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-04-19 14:19:50 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-04-21 13:22:08 +0000
@@ -304,6 +304,12 @@
 * ``bzr serve`` no longer crashes when a server_started hook is installed and
   IPv6 support is available on the system. (Jelmer Vernooij, #293697)
 
+* ``bzr status`` will not rewrite the dirstate file if it only has
+  'trivial' changes. (Currently limited to dir updates and newly-added
+  files changing state.) This saves a bit of time for regular operations.
+  eg. ``bzr status`` in a 100k tree takes 1.4s to compute the status, but 1s
+  to re-save the dirstate file. (John Arbash Meinel, #765881)
+
 * ``bzr tags`` will no longer choke on branches with ghost revisions in
   their mainline and tags on revisions not in the branch ancestry. 
   (Jelmer Vernooij, #397556)




More information about the bazaar-commits mailing list