Rev 2748: Merge double-sha avoidance patch. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Sep 3 04:13:25 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2748
revision-id: robertc at robertcollins.net-20070903031314-2l09xjk51p9f3w63
parent: robertc at robertcollins.net-20070902213554-d9lju6ujw3ydz00i
parent: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-09-03 13:13:14 +1000
message:
  Merge double-sha avoidance patch.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/tests/test_weave.py     testknit.py-20050627023648-9833cc5562ffb785
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.33
    revision-id: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
    parent: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit
    timestamp: Mon 2007-09-03 13:03:26 +1000
    message:
      Don't double-calculate the text sha1 during commit.
    modified:
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.32
    revision-id: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
    parent: pqm at pqm.ubuntu.com-20070901160444-hcr66zejwyy0jezc
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit
    timestamp: Mon 2007-09-03 12:58:58 +1000
    message:
       * The ``add_lines`` methods on ``VersionedFile`` implementations has changed
         its return value to include the sha1 and length of the inserted text. This
         allows the avoidance of double-sha1 calculations during commit.
         (Robert Collins)
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
      bzrlib/tests/test_weave.py     testknit.py-20050627023648-9833cc5562ffb785
      bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
      bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
=== modified file 'NEWS'
--- a/NEWS	2007-09-02 21:35:54 +0000
+++ b/NEWS	2007-09-03 03:13:14 +0000
@@ -115,6 +115,11 @@
    * ``Branch.append_revision`` is removed altogether; please use 
      ``Branch.set_last_revision_info`` instead.  (Martin Pool)
 
+   * The ``add_lines`` methods on ``VersionedFile`` implementations has changed
+     its return value to include the sha1 and length of the inserted text. This
+     allows the avoidance of double-sha1 calculations during commit.
+     (Robert Collins)
+
    * ``Transport.should_cache`` has been removed.  It was not called in the
      previous release.  (Martin Pool)
 

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2007-08-28 22:19:15 +0000
+++ b/bzrlib/fetch.py	2007-09-03 03:13:14 +0000
@@ -350,7 +350,7 @@
             if root_id not in versionedfile:
                 versionedfile[root_id] = to_store.get_weave_or_empty(root_id, 
                     self.target.get_transaction())
-            parent_texts[root_id] = versionedfile[root_id].add_lines(
+            _, _, parent_texts[root_id] = versionedfile[root_id].add_lines(
                 revision_id, parents, [], parent_texts)
 
     def regenerate_inventory(self, revs):

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-09-02 21:35:54 +0000
+++ b/bzrlib/knit.py	2007-09-03 03:13:14 +0000
@@ -907,6 +907,7 @@
             delta = False
 
         digest = sha_strings(lines)
+        text_length = sum(map(len, lines))
         options = []
         if lines:
             if lines[-1][-1] != '\n':
@@ -936,7 +937,7 @@
 
         access_memo = self._data.add_record(version_id, digest, store_lines)
         self._index.add_version(version_id, options, access_memo, parents)
-        return lines
+        return digest, text_length, lines
 
     def check(self, progress_bar=None):
         """See VersionedFile.check()."""

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-08-31 01:14:00 +0000
+++ b/bzrlib/repository.py	2007-09-03 03:13:14 +0000
@@ -2448,18 +2448,15 @@
             and text_sha1 == file_parents.values()[0].text_sha1
             and text_size == file_parents.values()[0].text_size):
             previous_ie = file_parents.values()[0]
-            versionedfile = self.repository.weave_store.get_weave(file_id, 
+            versionedfile = self.repository.weave_store.get_weave(file_id,
                 self.repository.get_transaction())
-            versionedfile.clone_text(self._new_revision_id, 
+            versionedfile.clone_text(self._new_revision_id,
                 previous_ie.revision, file_parents.keys())
             return text_sha1, text_size
         else:
             new_lines = get_content_byte_lines()
-            # TODO: Rather than invoking sha_strings here, _add_text_to_weave
-            # should return the SHA1 and size
-            self._add_text_to_weave(file_id, new_lines, file_parents.keys())
-            return osutils.sha_strings(new_lines), \
-                sum(map(len, new_lines))
+            return self._add_text_to_weave(file_id, new_lines,
+                file_parents.keys())
 
     def modified_link(self, file_id, file_parents, link_target):
         """Record the presence of a symbolic link.
@@ -2473,8 +2470,10 @@
     def _add_text_to_weave(self, file_id, new_lines, parents):
         versionedfile = self.repository.weave_store.get_weave_or_empty(
             file_id, self.repository.get_transaction())
-        versionedfile.add_lines(self._new_revision_id, parents, new_lines)
+        result = versionedfile.add_lines(
+            self._new_revision_id, parents, new_lines)[0:2]
         versionedfile.clear_cache()
+        return result
 
 
 class _CommitBuilder(CommitBuilder):

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-09-02 21:35:54 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-09-03 03:13:14 +0000
@@ -81,18 +81,14 @@
     def test_adds_with_parent_texts(self):
         f = self.get_file()
         parent_texts = {}
-        parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
+        _, _, parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
         try:
-            parent_texts['r1'] = f.add_lines_with_ghosts('r1',
-                                                         ['r0', 'ghost'], 
-                                                         ['b\n', 'c\n'],
-                                                         parent_texts=parent_texts)
+            _, _, parent_texts['r1'] = f.add_lines_with_ghosts('r1',
+                ['r0', 'ghost'], ['b\n', 'c\n'], parent_texts=parent_texts)
         except NotImplementedError:
             # if the format doesn't support ghosts, just add normally.
-            parent_texts['r1'] = f.add_lines('r1',
-                                             ['r0'], 
-                                             ['b\n', 'c\n'],
-                                             parent_texts=parent_texts)
+            _, _, parent_texts['r1'] = f.add_lines('r1',
+                ['r0'], ['b\n', 'c\n'], parent_texts=parent_texts)
         f.add_lines('r2', ['r1'], ['c\n', 'd\n'], parent_texts=parent_texts)
         self.assertNotEqual(None, parent_texts['r0'])
         self.assertNotEqual(None, parent_texts['r1'])
@@ -168,6 +164,26 @@
         self.assertRaises(errors.ReservedId,
             vf.add_delta, 'a:', [], None, 'sha1', False, ((0, 0, 0, []),))
 
+    def test_add_lines_return_value(self):
+        # add_lines should return the sha1 and the text size.
+        vf = self.get_file()
+        empty_text = ('a', [])
+        sample_text_nl = ('b', ["foo\n", "bar\n"])
+        sample_text_no_nl = ('c', ["foo\n", "bar"])
+        # check results for the three cases:
+        for version, lines in (empty_text, sample_text_nl, sample_text_no_nl):
+            # the first two elements are the same for all versioned files:
+            # - the digest and the size of the text. For some versioned files
+            #   additional data is returned in additional tuple elements.
+            result = vf.add_lines(version, [], lines)
+            self.assertEqual(3, len(result))
+            self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+                result[0:2])
+        # parents should not affect the result:
+        lines = sample_text_nl[1]
+        self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+            vf.add_lines('d', ['b', 'c'], lines)[0:2])
+
     def test_get_reserved(self):
         vf = self.get_file()
         self.assertRaises(errors.ReservedId, vf.get_delta, 'b:')

=== modified file 'bzrlib/tests/test_weave.py'
--- a/bzrlib/tests/test_weave.py	2007-08-15 04:33:34 +0000
+++ b/bzrlib/tests/test_weave.py	2007-09-03 02:58:58 +0000
@@ -39,6 +39,7 @@
 
 
 class TestBase(TestCase):
+
     def check_read_write(self, k):
         """Check the weave k can be written & re-read."""
         from tempfile import TemporaryFile
@@ -75,16 +76,6 @@
         k = Weave()
 
 
-class StoreText(TestBase):
-    """Store and retrieve a simple text."""
-
-    def test_storing_text(self):
-        k = Weave()
-        idx = k.add_lines('text0', [], TEXT_0)
-        self.assertEqual(k.get_lines(idx), TEXT_0)
-        self.assertEqual(idx, 0)
-
-
 class AnnotateOne(TestBase):
     def runTest(self):
         k = Weave()
@@ -93,20 +84,6 @@
                          [('text0', TEXT_0[0])])
 
 
-class StoreTwo(TestBase):
-    def runTest(self):
-        k = Weave()
-
-        idx = k.add_lines('text0', [], TEXT_0)
-        self.assertEqual(idx, 0)
-
-        idx = k.add_lines('text1', [], TEXT_1)
-        self.assertEqual(idx, 1)
-
-        self.assertEqual(k.get_lines(0), TEXT_0)
-        self.assertEqual(k.get_lines(1), TEXT_1)
-
-
 class GetSha1(TestBase):
     def test_get_sha1(self):
         k = Weave()
@@ -133,7 +110,8 @@
 
 class RepeatedAdd(TestBase):
     """Add the same version twice; harmless."""
-    def runTest(self):
+
+    def test_duplicate_add(self):
         k = Weave()
         idx = k.add_lines('text0', [], TEXT_0)
         idx2 = k.add_lines('text0', [], TEXT_0)

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-09-02 21:35:54 +0000
+++ b/bzrlib/versionedfile.py	2007-09-03 03:13:14 +0000
@@ -130,9 +130,9 @@
         :param left_matching_blocks: a hint about which areas are common
             between the text and its left-hand-parent.  The format is
             the SequenceMatcher.get_matching_blocks format.
-        :return: An opaque representation of the inserted version which can be
-                 provided back to future add_lines calls in the parent_texts
-                 dictionary.
+        :return: The text sha1, the number of bytes in the text, and an opaque
+                 representation of the inserted version which can be provided
+                 back to future add_lines calls in the parent_texts dictionary.
         """
         version_id = osutils.safe_revision_id(version_id)
         parents = [osutils.safe_revision_id(v) for v in parents]

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2007-08-15 06:46:33 +0000
+++ b/bzrlib/weave.py	2007-09-03 02:58:58 +0000
@@ -419,7 +419,8 @@
     def _add_lines(self, version_id, parents, lines, parent_texts,
                    left_matching_blocks=None):
         """See VersionedFile.add_lines."""
-        return self._add(version_id, lines, map(self._lookup, parents))
+        idx = self._add(version_id, lines, map(self._lookup, parents))
+        return sha_strings(lines), sum(map(len, lines)), idx
 
     def _add(self, version_id, lines, parents, sha1=None):
         """Add a single text on top of the weave.
@@ -491,7 +492,7 @@
         # another small special case: a merge, producing the same text
         # as auto-merge
         if lines == basis_lines:
-            return new_version            
+            return new_version
 
         # add a sentinel, because we can also match against the final line
         basis_lineno.append(len(self._weave))



More information about the bazaar-commits mailing list