Rev 5755: Change the create_file api to allow it to take a sha1 of the content. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-transform-cache-sha-740932
John Arbash Meinel
john at arbash-meinel.com
Mon Apr 4 14:29:30 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-transform-cache-sha-740932
------------------------------------------------------------
revno: 5755
revision-id: john at arbash-meinel.com-20110404142922-06aq2708u4f1r2by
parent: john at arbash-meinel.com-20110404133854-78q3ruqlrq2x3tff
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-transform-cache-sha-740932
timestamp: Mon 2011-04-04 16:29:22 +0200
message:
Change the create_file api to allow it to take a sha1 of the content.
That way we don't have to compute the sha1 value while creating the content.
Since we normally already have the sha1. build_tree needs to be updated.
-------------- next part --------------
=== modified file 'bzrlib/tests/test_transform.py'
--- a/bzrlib/tests/test_transform.py 2011-04-04 12:34:13 +0000
+++ b/bzrlib/tests/test_transform.py 2011-04-04 14:29:22 +0000
@@ -161,6 +161,48 @@
transform.finalize()
transform.finalize()
+ def test_create_file_caches_sha1(self):
+ trans, root = self.get_transform()
+ self.wt.lock_tree_write()
+ self.addCleanup(self.wt.unlock)
+ content = ['just some content\n']
+ sha1 = osutils.sha_strings(content)
+ trans_id = trans.create_path('file1', root)
+ # Roll back the clock
+ transform._creation_mtime = creation_mtime = time.time() - 20.0
+ trans.create_file(content, trans_id, sha1=sha1)
+ st_val = osutils.lstat(trans._limbo_name(trans_id))
+ o_sha1, o_st_val = trans._observed_sha1s[trans_id]
+ self.assertEqual(o_sha1, sha1)
+ self.assertEqualStat(o_st_val, st_val)
+
+ def test__apply_insertions_updates_sha1(self):
+ trans, root = self.get_transform()
+ self.wt.lock_tree_write()
+ self.addCleanup(self.wt.unlock)
+ content = ['just some content\n']
+ sha1 = osutils.sha_strings(content)
+ trans_id = trans.create_path('file1', root)
+ # Roll back the clock
+ transform._creation_mtime = creation_mtime = time.time() - 20.0
+ trans.create_file(content, trans_id, sha1=sha1)
+ st_val = osutils.lstat(trans._limbo_name(trans_id))
+ o_sha1, o_st_val = trans._observed_sha1s[trans_id]
+ self.assertEqual(o_sha1, sha1)
+ self.assertEqualStat(o_st_val, st_val)
+ creation_mtime += 10.0
+ # We fake a time difference from when the file was created until now it
+ # is being renamed by using os.utime. Note that the change we actually
+ # want to see is the real ctime change from 'os.rename()', but as long
+ # as we observe a new stat value, we should be fine.
+ os.utime(trans._limbo_name(trans_id), (creation_mtime, creation_mtime))
+ trans.apply()
+ new_st_val = osutils.lstat(self.wt.abspath('file1'))
+ o_sha1, o_st_val = trans._observed_sha1s[trans_id]
+ self.assertEqual(o_sha1, sha1)
+ self.assertEqualStat(o_st_val, new_st_val)
+ self.assertNotEqual(st_val.st_mtime, new_st_val.st_mtime)
+
def test_create_files_same_timestamp(self):
transform, root = self.get_transform()
self.wt.lock_tree_write()
=== modified file 'bzrlib/transform.py'
--- a/bzrlib/transform.py 2011-04-04 13:37:25 +0000
+++ b/bzrlib/transform.py 2011-04-04 14:29:22 +0000
@@ -19,7 +19,6 @@
from stat import S_ISREG, S_IEXEC
import time
-import bzrlib
from bzrlib import (
errors,
lazy_import,
@@ -1251,16 +1250,19 @@
descendants.update(self._limbo_descendants(descendant))
return descendants
- def create_file(self, contents, trans_id, mode_id=None):
+ def create_file(self, contents, trans_id, mode_id=None, sha1=None):
"""Schedule creation of a new file.
- See also new_file.
-
- Contents is an iterator of strings, all of which will be written
- to the target destination.
-
- New file takes the permissions of any existing file with that id,
- unless mode_id is specified.
+ :seealso: new_file.
+
+ :param contents: an iterator of strings, all of which will be written
+ to the target destination.
+ :param trans_id: TreeTransform handle
+ :param mode_id: If not None, force the mode of the target file to match
+ the mode of the object referenced by mode_id.
+ Otherwise, we will try to preserve mode bits of an existing file.
+ :param sha1: If the sha1 of this content is already known, pass it in.
+ We can use it to prevent future sha1 computations.
"""
name = self._limbo_name(trans_id)
f = open(name, 'wb')
@@ -1273,18 +1275,7 @@
f.close()
os.unlink(name)
raise
- if contents.__class__ is list:
- sha_digest = osutils.sha_strings(contents)
- f.writelines(contents)
- else:
- sha_value = osutils.sha()
- def observe_sha1(contents):
- sha_value_update = sha_value.update
- for content in contents:
- sha_value_update(content)
- yield content
- f.writelines(observe_sha1(contents))
- sha_digest = sha_value.hexdigest()
+ f.writelines(contents)
finally:
f.close()
self._set_mtime(name)
@@ -1292,7 +1283,8 @@
# It is unfortunate we have to use lstat instead of fstat, but we just
# used utime and chmod on the file, so we need the accurate final
# details.
- self._observed_sha1s[trans_id] = (sha_digest, osutils.lstat(name))
+ if sha1 is not None:
+ self._observed_sha1s[trans_id] = (sha1, osutils.lstat(name))
def _read_file_chunks(self, trans_id):
cur_file = open(self._limbo_name(trans_id), 'rb')
@@ -1855,6 +1847,10 @@
modified_paths.append(full_path)
if trans_id in self._new_executability:
self._set_executability(path, trans_id)
+ if trans_id in self._observed_sha1s:
+ o_sha1, o_st_val = self._observed_sha1s[trans_id]
+ st = osutils.lstat(full_path)
+ self._observed_sha1s[trans_id] = (o_sha1, st)
finally:
child_pb.finished()
self._new_contents.clear()
@@ -2547,7 +2543,7 @@
executable = tree.is_executable(file_id, tree_path)
if executable:
tt.set_executability(executable, trans_id)
- trans_data = (trans_id, tree_path)
+ trans_data = (trans_id, tree_path, entry.text_sha1)
deferred_contents.append((file_id, trans_data))
else:
file_trans_id[file_id] = new_by_entry(tt, entry, parent_id,
@@ -2598,10 +2594,11 @@
unchanged = dict(unchanged)
new_desired_files = []
count = 0
- for file_id, (trans_id, tree_path) in desired_files:
+ for file_id, (trans_id, tree_path, text_sha1) in desired_files:
accelerator_path = unchanged.get(file_id)
if accelerator_path is None:
- new_desired_files.append((file_id, (trans_id, tree_path)))
+ new_desired_files.append((file_id,
+ (trans_id, tree_path, text_sha1)))
continue
pb.update('Adding file contents', count + offset, total)
if hardlink:
@@ -2623,7 +2620,7 @@
pass
count += 1
offset += count
- for count, ((trans_id, tree_path), contents) in enumerate(
+ for count, ((trans_id, tree_path, text_sha1), contents) in enumerate(
tree.iter_files_bytes(new_desired_files)):
if wt.supports_content_filtering():
filters = wt._content_filter_stack(tree_path)
More information about the bazaar-commits
mailing list