Rev 5775: (jameinel) Change ``bzrlib.transform.build_tree`` to pass the sha1 of newly in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Fri Apr 8 18:04:22 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5775 [merge]
revision-id: pqm at pqm.ubuntu.com-20110408180413-fry1x3hu5trcytxf
parent: pqm at pqm.ubuntu.com-20110408164023-4t8mlpiha78ql802
parent: john at arbash-meinel.com-20110408122805-qdo9uc2dtc9mzoic
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2011-04-08 18:04:13 +0000
message:
  (jameinel) Change ``bzrlib.transform.build_tree`` to pass the sha1 of newly
   created files. This avoids re-hashing content on the next 'bzr st'. Saving
   ~30s on a gcc-sized tree. (John A Meinel)
modified:
  bzrlib/tests/test_transform.py test_transaction.py-20060105172520-b3ffb3946550e6c4
  bzrlib/transform.py            transform.py-20060105172343-dd99e54394d91687
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/tests/test_transform.py'
--- a/bzrlib/tests/test_transform.py	2011-04-05 13:25:50 +0000
+++ b/bzrlib/tests/test_transform.py	2011-04-05 13:37:00 +0000
@@ -1970,6 +1970,18 @@
         self.addCleanup(target.unlock)
         self.assertEqual([], list(target.iter_changes(revision_tree)))
 
+    def test_build_tree_accelerator_tree_observes_sha1(self):
+        source = self.create_ab_tree()
+        sha1 = osutils.sha_string('A')
+        target = self.make_branch_and_tree('target')
+        target.lock_write()
+        self.addCleanup(target.unlock)
+        state = target.current_dirstate()
+        state._cutoff_time = time.time() + 60
+        build_tree(source.basis_tree(), target, source)
+        entry = state._get_entry(0, path_utf8='file1')
+        self.assertEqual(sha1, entry[1][0][1])
+
     def test_build_tree_accelerator_tree_missing_file(self):
         source = self.create_ab_tree()
         os.unlink('source/file1')
@@ -2133,6 +2145,42 @@
         self.assertEqual('file.moved', target.id2path('lower-id'))
         self.assertEqual('FILE', target.id2path('upper-id'))
 
+    def test_build_tree_observes_sha(self):
+        source = self.make_branch_and_tree('source')
+        self.build_tree(['source/file1', 'source/dir/', 'source/dir/file2'])
+        source.add(['file1', 'dir', 'dir/file2'],
+                   ['file1-id', 'dir-id', 'file2-id'])
+        source.commit('new files')
+        target = self.make_branch_and_tree('target')
+        target.lock_write()
+        self.addCleanup(target.unlock)
+        # We make use of the fact that DirState caches its cutoff time. So we
+        # set the 'safe' time to one minute in the future.
+        state = target.current_dirstate()
+        state._cutoff_time = time.time() + 60
+        build_tree(source.basis_tree(), target)
+        entry1_sha = osutils.sha_file_by_name('source/file1')
+        entry2_sha = osutils.sha_file_by_name('source/dir/file2')
+        # entry[1] is the state information, entry[1][0] is the state of the
+        # working tree, entry[1][0][1] is the sha value for the current working
+        # tree
+        entry1 = state._get_entry(0, path_utf8='file1')
+        self.assertEqual(entry1_sha, entry1[1][0][1])
+        # The 'size' field must also be set.
+        self.assertEqual(25, entry1[1][0][2])
+        entry1_state = entry1[1][0]
+        entry2 = state._get_entry(0, path_utf8='dir/file2')
+        self.assertEqual(entry2_sha, entry2[1][0][1])
+        self.assertEqual(29, entry2[1][0][2])
+        entry2_state = entry2[1][0]
+        # Now, make sure that we don't have to re-read the content. The
+        # packed_stat should match exactly.
+        self.assertEqual(entry1_sha, target.get_file_sha1('file1-id', 'file1'))
+        self.assertEqual(entry2_sha,
+                         target.get_file_sha1('file2-id', 'dir/file2'))
+        self.assertEqual(entry1_state, entry1[1][0])
+        self.assertEqual(entry2_state, entry2[1][0])
+
 
 class TestCommitTransform(tests.TestCaseWithTransport):
 

=== modified file 'bzrlib/transform.py'
--- a/bzrlib/transform.py	2011-04-07 10:36:24 +0000
+++ b/bzrlib/transform.py	2011-04-08 12:28:05 +0000
@@ -2541,7 +2541,7 @@
                     executable = tree.is_executable(file_id, tree_path)
                     if executable:
                         tt.set_executability(executable, trans_id)
-                    trans_data = (trans_id, tree_path)
+                    trans_data = (trans_id, tree_path, entry.text_sha1)
                     deferred_contents.append((file_id, trans_data))
                 else:
                     file_trans_id[file_id] = new_by_entry(tt, entry, parent_id,
@@ -2592,10 +2592,11 @@
         unchanged = dict(unchanged)
         new_desired_files = []
         count = 0
-        for file_id, (trans_id, tree_path) in desired_files:
+        for file_id, (trans_id, tree_path, text_sha1) in desired_files:
             accelerator_path = unchanged.get(file_id)
             if accelerator_path is None:
-                new_desired_files.append((file_id, (trans_id, tree_path)))
+                new_desired_files.append((file_id,
+                    (trans_id, tree_path, text_sha1)))
                 continue
             pb.update('Adding file contents', count + offset, total)
             if hardlink:
@@ -2608,7 +2609,7 @@
                     contents = filtered_output_bytes(contents, filters,
                         ContentFilterContext(tree_path, tree))
                 try:
-                    tt.create_file(contents, trans_id)
+                    tt.create_file(contents, trans_id, sha1=text_sha1)
                 finally:
                     try:
                         contents.close()
@@ -2617,13 +2618,13 @@
                         pass
             count += 1
         offset += count
-    for count, ((trans_id, tree_path), contents) in enumerate(
+    for count, ((trans_id, tree_path, text_sha1), contents) in enumerate(
             tree.iter_files_bytes(new_desired_files)):
         if wt.supports_content_filtering():
             filters = wt._content_filter_stack(tree_path)
             contents = filtered_output_bytes(contents, filters,
                 ContentFilterContext(tree_path, tree))
-        tt.create_file(contents, trans_id)
+        tt.create_file(contents, trans_id, sha1=text_sha1)
         pb.update('Adding file contents', count + offset, total)
 
 

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-04-08 10:54:03 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-04-08 12:28:05 +0000
@@ -26,6 +26,12 @@
 .. Improvements to existing commands, especially improved performance 
    or memory usage, or better results.
 
+* When building a new WorkingTree (such as during ``bzr co`` or
+  ``bzr branch``) we now properly store the stat and hash of files that
+  are old enough. This saves a fair amount of time on the first
+  ``bzr status`` (on a 500MB tree, it saves about 30+s).
+  (John Arbash Meinel, #740932)
+
 * Resolve ``lp:FOO`` urls locally rather than doing an XMLRPC request if
   the user has done ``bzr launchpad-login``. The bzr+ssh URLs were already
   being handed off to the remote server anyway (xmlrpc has been mapping




More information about the bazaar-commits mailing list