Rev 2748: Merge double-sha avoidance patch. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Mon Sep 3 04:13:25 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2748
revision-id: robertc at robertcollins.net-20070903031314-2l09xjk51p9f3w63
parent: robertc at robertcollins.net-20070902213554-d9lju6ujw3ydz00i
parent: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-09-03 13:13:14 +1000
message:
Merge double-sha avoidance patch.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/tests/test_weave.py testknit.py-20050627023648-9833cc5562ffb785
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.33
revision-id: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
parent: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Mon 2007-09-03 13:03:26 +1000
message:
Don't double-calculate the text sha1 during commit.
modified:
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.32
revision-id: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
parent: pqm at pqm.ubuntu.com-20070901160444-hcr66zejwyy0jezc
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Mon 2007-09-03 12:58:58 +1000
message:
* The ``add_lines`` methods on ``VersionedFile`` implementations has changed
its return value to include the sha1 and length of the inserted text. This
allows the avoidance of double-sha1 calculations during commit.
(Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/tests/test_weave.py testknit.py-20050627023648-9833cc5562ffb785
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
=== modified file 'NEWS'
--- a/NEWS 2007-09-02 21:35:54 +0000
+++ b/NEWS 2007-09-03 03:13:14 +0000
@@ -115,6 +115,11 @@
* ``Branch.append_revision`` is removed altogether; please use
``Branch.set_last_revision_info`` instead. (Martin Pool)
+ * The ``add_lines`` methods on ``VersionedFile`` implementations has changed
+ its return value to include the sha1 and length of the inserted text. This
+ allows the avoidance of double-sha1 calculations during commit.
+ (Robert Collins)
+
* ``Transport.should_cache`` has been removed. It was not called in the
previous release. (Martin Pool)
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2007-08-28 22:19:15 +0000
+++ b/bzrlib/fetch.py 2007-09-03 03:13:14 +0000
@@ -350,7 +350,7 @@
if root_id not in versionedfile:
versionedfile[root_id] = to_store.get_weave_or_empty(root_id,
self.target.get_transaction())
- parent_texts[root_id] = versionedfile[root_id].add_lines(
+ _, _, parent_texts[root_id] = versionedfile[root_id].add_lines(
revision_id, parents, [], parent_texts)
def regenerate_inventory(self, revs):
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-09-02 21:35:54 +0000
+++ b/bzrlib/knit.py 2007-09-03 03:13:14 +0000
@@ -907,6 +907,7 @@
delta = False
digest = sha_strings(lines)
+ text_length = sum(map(len, lines))
options = []
if lines:
if lines[-1][-1] != '\n':
@@ -936,7 +937,7 @@
access_memo = self._data.add_record(version_id, digest, store_lines)
self._index.add_version(version_id, options, access_memo, parents)
- return lines
+ return digest, text_length, lines
def check(self, progress_bar=None):
"""See VersionedFile.check()."""
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-08-31 01:14:00 +0000
+++ b/bzrlib/repository.py 2007-09-03 03:13:14 +0000
@@ -2448,18 +2448,15 @@
and text_sha1 == file_parents.values()[0].text_sha1
and text_size == file_parents.values()[0].text_size):
previous_ie = file_parents.values()[0]
- versionedfile = self.repository.weave_store.get_weave(file_id,
+ versionedfile = self.repository.weave_store.get_weave(file_id,
self.repository.get_transaction())
- versionedfile.clone_text(self._new_revision_id,
+ versionedfile.clone_text(self._new_revision_id,
previous_ie.revision, file_parents.keys())
return text_sha1, text_size
else:
new_lines = get_content_byte_lines()
- # TODO: Rather than invoking sha_strings here, _add_text_to_weave
- # should return the SHA1 and size
- self._add_text_to_weave(file_id, new_lines, file_parents.keys())
- return osutils.sha_strings(new_lines), \
- sum(map(len, new_lines))
+ return self._add_text_to_weave(file_id, new_lines,
+ file_parents.keys())
def modified_link(self, file_id, file_parents, link_target):
"""Record the presence of a symbolic link.
@@ -2473,8 +2470,10 @@
def _add_text_to_weave(self, file_id, new_lines, parents):
versionedfile = self.repository.weave_store.get_weave_or_empty(
file_id, self.repository.get_transaction())
- versionedfile.add_lines(self._new_revision_id, parents, new_lines)
+ result = versionedfile.add_lines(
+ self._new_revision_id, parents, new_lines)[0:2]
versionedfile.clear_cache()
+ return result
class _CommitBuilder(CommitBuilder):
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2007-09-02 21:35:54 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2007-09-03 03:13:14 +0000
@@ -81,18 +81,14 @@
def test_adds_with_parent_texts(self):
f = self.get_file()
parent_texts = {}
- parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
+ _, _, parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
try:
- parent_texts['r1'] = f.add_lines_with_ghosts('r1',
- ['r0', 'ghost'],
- ['b\n', 'c\n'],
- parent_texts=parent_texts)
+ _, _, parent_texts['r1'] = f.add_lines_with_ghosts('r1',
+ ['r0', 'ghost'], ['b\n', 'c\n'], parent_texts=parent_texts)
except NotImplementedError:
# if the format doesn't support ghosts, just add normally.
- parent_texts['r1'] = f.add_lines('r1',
- ['r0'],
- ['b\n', 'c\n'],
- parent_texts=parent_texts)
+ _, _, parent_texts['r1'] = f.add_lines('r1',
+ ['r0'], ['b\n', 'c\n'], parent_texts=parent_texts)
f.add_lines('r2', ['r1'], ['c\n', 'd\n'], parent_texts=parent_texts)
self.assertNotEqual(None, parent_texts['r0'])
self.assertNotEqual(None, parent_texts['r1'])
@@ -168,6 +164,26 @@
self.assertRaises(errors.ReservedId,
vf.add_delta, 'a:', [], None, 'sha1', False, ((0, 0, 0, []),))
+ def test_add_lines_return_value(self):
+ # add_lines should return the sha1 and the text size.
+ vf = self.get_file()
+ empty_text = ('a', [])
+ sample_text_nl = ('b', ["foo\n", "bar\n"])
+ sample_text_no_nl = ('c', ["foo\n", "bar"])
+ # check results for the three cases:
+ for version, lines in (empty_text, sample_text_nl, sample_text_no_nl):
+ # the first two elements are the same for all versioned files:
+ # - the digest and the size of the text. For some versioned files
+ # additional data is returned in additional tuple elements.
+ result = vf.add_lines(version, [], lines)
+ self.assertEqual(3, len(result))
+ self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+ result[0:2])
+ # parents should not affect the result:
+ lines = sample_text_nl[1]
+ self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+ vf.add_lines('d', ['b', 'c'], lines)[0:2])
+
def test_get_reserved(self):
vf = self.get_file()
self.assertRaises(errors.ReservedId, vf.get_delta, 'b:')
=== modified file 'bzrlib/tests/test_weave.py'
--- a/bzrlib/tests/test_weave.py 2007-08-15 04:33:34 +0000
+++ b/bzrlib/tests/test_weave.py 2007-09-03 02:58:58 +0000
@@ -39,6 +39,7 @@
class TestBase(TestCase):
+
def check_read_write(self, k):
"""Check the weave k can be written & re-read."""
from tempfile import TemporaryFile
@@ -75,16 +76,6 @@
k = Weave()
-class StoreText(TestBase):
- """Store and retrieve a simple text."""
-
- def test_storing_text(self):
- k = Weave()
- idx = k.add_lines('text0', [], TEXT_0)
- self.assertEqual(k.get_lines(idx), TEXT_0)
- self.assertEqual(idx, 0)
-
-
class AnnotateOne(TestBase):
def runTest(self):
k = Weave()
@@ -93,20 +84,6 @@
[('text0', TEXT_0[0])])
-class StoreTwo(TestBase):
- def runTest(self):
- k = Weave()
-
- idx = k.add_lines('text0', [], TEXT_0)
- self.assertEqual(idx, 0)
-
- idx = k.add_lines('text1', [], TEXT_1)
- self.assertEqual(idx, 1)
-
- self.assertEqual(k.get_lines(0), TEXT_0)
- self.assertEqual(k.get_lines(1), TEXT_1)
-
-
class GetSha1(TestBase):
def test_get_sha1(self):
k = Weave()
@@ -133,7 +110,8 @@
class RepeatedAdd(TestBase):
"""Add the same version twice; harmless."""
- def runTest(self):
+
+ def test_duplicate_add(self):
k = Weave()
idx = k.add_lines('text0', [], TEXT_0)
idx2 = k.add_lines('text0', [], TEXT_0)
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2007-09-02 21:35:54 +0000
+++ b/bzrlib/versionedfile.py 2007-09-03 03:13:14 +0000
@@ -130,9 +130,9 @@
:param left_matching_blocks: a hint about which areas are common
between the text and its left-hand-parent. The format is
the SequenceMatcher.get_matching_blocks format.
- :return: An opaque representation of the inserted version which can be
- provided back to future add_lines calls in the parent_texts
- dictionary.
+ :return: The text sha1, the number of bytes in the text, and an opaque
+ representation of the inserted version which can be provided
+ back to future add_lines calls in the parent_texts dictionary.
"""
version_id = osutils.safe_revision_id(version_id)
parents = [osutils.safe_revision_id(v) for v in parents]
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2007-08-15 06:46:33 +0000
+++ b/bzrlib/weave.py 2007-09-03 02:58:58 +0000
@@ -419,7 +419,8 @@
def _add_lines(self, version_id, parents, lines, parent_texts,
left_matching_blocks=None):
"""See VersionedFile.add_lines."""
- return self._add(version_id, lines, map(self._lookup, parents))
+ idx = self._add(version_id, lines, map(self._lookup, parents))
+ return sha_strings(lines), sum(map(len, lines)), idx
def _add(self, version_id, lines, parents, sha1=None):
"""Add a single text on top of the weave.
@@ -491,7 +492,7 @@
# another small special case: a merge, producing the same text
# as auto-merge
if lines == basis_lines:
- return new_version
+ return new_version
# add a sentinel, because we can also match against the final line
basis_lineno.append(len(self._weave))
More information about the bazaar-commits
mailing list