Rev 2791: (robertc) Don't double-calculate the text sha1 during commit. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Sep 3 22:19:11 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2791
revision-id: pqm at pqm.ubuntu.com-20070903211907-igj2uj83hz1yyqs9
parent: pqm at pqm.ubuntu.com-20070903130729-qdcrag0a7vcpzfgm
parent: robertc at robertcollins.net-20070903202305-yj4obhvvw3j7t4jc
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-09-03 22:19:07 +0100
message:
  (robertc) Don't double-calculate the text sha1 during commit. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/tests/test_weave.py     testknit.py-20050627023648-9833cc5562ffb785
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 2776.1.3
    merged: robertc at robertcollins.net-20070903202305-yj4obhvvw3j7t4jc
    parent: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit
    timestamp: Tue 2007-09-04 06:23:05 +1000
    message:
      Missed bundles in the return value conversion of vf.add_lines.
    ------------------------------------------------------------
    revno: 2776.1.2
    merged: robertc at robertcollins.net-20070903030326-xam93hxlthc6l37w
    parent: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit
    timestamp: Mon 2007-09-03 13:03:26 +1000
    message:
      Don't double-calculate the text sha1 during commit.
    ------------------------------------------------------------
    revno: 2776.1.1
    merged: robertc at robertcollins.net-20070903025858-k2pxq3qz6ulhhtgq
    parent: pqm at pqm.ubuntu.com-20070901160444-hcr66zejwyy0jezc
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit
    timestamp: Mon 2007-09-03 12:58:58 +1000
    message:
       * The ``add_lines`` methods on ``VersionedFile`` implementations has changed
         its return value to include the sha1 and length of the inserted text. This
         allows the avoidance of double-sha1 calculations during commit.
         (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS	2007-09-03 12:32:14 +0000
+++ b/NEWS	2007-09-03 21:19:07 +0000
@@ -161,6 +161,11 @@
    * ``Branch.append_revision`` is removed altogether; please use 
      ``Branch.set_last_revision_info`` instead.  (Martin Pool)
 
+   * The ``add_lines`` methods on ``VersionedFile`` implementations has changed
+     its return value to include the sha1 and length of the inserted text. This
+     allows the avoidance of double-sha1 calculations during commit.
+     (Robert Collins)
+
    * ``Transport.should_cache`` has been removed.  It was not called in the
      previous release.  (Martin Pool)
 

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2007-08-22 22:30:11 +0000
+++ b/bzrlib/fetch.py	2007-09-03 02:58:58 +0000
@@ -351,7 +351,7 @@
             if root_id not in versionedfile:
                 versionedfile[root_id] = to_store.get_weave_or_empty(root_id, 
                     self.target.get_transaction())
-            parent_texts[root_id] = versionedfile[root_id].add_lines(
+            _, _, parent_texts[root_id] = versionedfile[root_id].add_lines(
                 revision_id, parents, [], parent_texts)
 
     def regenerate_inventory(self, revs):

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-09-02 22:38:54 +0000
+++ b/bzrlib/knit.py	2007-09-03 21:19:07 +0000
@@ -915,6 +915,7 @@
             delta = False
 
         digest = sha_strings(lines)
+        text_length = sum(map(len, lines))
         options = []
         if lines:
             if lines[-1][-1] != '\n':
@@ -944,7 +945,7 @@
 
         access_memo = self._data.add_record(version_id, digest, store_lines)
         self._index.add_version(version_id, options, access_memo, parents)
-        return lines
+        return digest, text_length, lines
 
     def check(self, progress_bar=None):
         """See VersionedFile.check()."""

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-08-28 01:58:42 +0000
+++ b/bzrlib/repository.py	2007-09-03 03:03:26 +0000
@@ -2260,18 +2260,15 @@
             and text_sha1 == file_parents.values()[0].text_sha1
             and text_size == file_parents.values()[0].text_size):
             previous_ie = file_parents.values()[0]
-            versionedfile = self.repository.weave_store.get_weave(file_id, 
+            versionedfile = self.repository.weave_store.get_weave(file_id,
                 self.repository.get_transaction())
-            versionedfile.clone_text(self._new_revision_id, 
+            versionedfile.clone_text(self._new_revision_id,
                 previous_ie.revision, file_parents.keys())
             return text_sha1, text_size
         else:
             new_lines = get_content_byte_lines()
-            # TODO: Rather than invoking sha_strings here, _add_text_to_weave
-            # should return the SHA1 and size
-            self._add_text_to_weave(file_id, new_lines, file_parents.keys())
-            return osutils.sha_strings(new_lines), \
-                sum(map(len, new_lines))
+            return self._add_text_to_weave(file_id, new_lines,
+                file_parents.keys())
 
     def modified_link(self, file_id, file_parents, link_target):
         """Record the presence of a symbolic link.
@@ -2285,8 +2282,10 @@
     def _add_text_to_weave(self, file_id, new_lines, parents):
         versionedfile = self.repository.weave_store.get_weave_or_empty(
             file_id, self.repository.get_transaction())
-        versionedfile.add_lines(self._new_revision_id, parents, new_lines)
+        result = versionedfile.add_lines(
+            self._new_revision_id, parents, new_lines)[0:2]
         versionedfile.clear_cache()
+        return result
 
 
 class _CommitBuilder(CommitBuilder):

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-09-02 22:38:54 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-09-03 21:19:07 +0000
@@ -84,18 +84,14 @@
     def test_adds_with_parent_texts(self):
         f = self.get_file()
         parent_texts = {}
-        parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
+        _, _, parent_texts['r0'] = f.add_lines('r0', [], ['a\n', 'b\n'])
         try:
-            parent_texts['r1'] = f.add_lines_with_ghosts('r1',
-                                                         ['r0', 'ghost'], 
-                                                         ['b\n', 'c\n'],
-                                                         parent_texts=parent_texts)
+            _, _, parent_texts['r1'] = f.add_lines_with_ghosts('r1',
+                ['r0', 'ghost'], ['b\n', 'c\n'], parent_texts=parent_texts)
         except NotImplementedError:
             # if the format doesn't support ghosts, just add normally.
-            parent_texts['r1'] = f.add_lines('r1',
-                                             ['r0'], 
-                                             ['b\n', 'c\n'],
-                                             parent_texts=parent_texts)
+            _, _, parent_texts['r1'] = f.add_lines('r1',
+                ['r0'], ['b\n', 'c\n'], parent_texts=parent_texts)
         f.add_lines('r2', ['r1'], ['c\n', 'd\n'], parent_texts=parent_texts)
         self.assertNotEqual(None, parent_texts['r0'])
         self.assertNotEqual(None, parent_texts['r1'])
@@ -171,6 +167,26 @@
         self.assertRaises(errors.ReservedId,
             vf.add_delta, 'a:', [], None, 'sha1', False, ((0, 0, 0, []),))
 
+    def test_add_lines_return_value(self):
+        # add_lines should return the sha1 and the text size.
+        vf = self.get_file()
+        empty_text = ('a', [])
+        sample_text_nl = ('b', ["foo\n", "bar\n"])
+        sample_text_no_nl = ('c', ["foo\n", "bar"])
+        # check results for the three cases:
+        for version, lines in (empty_text, sample_text_nl, sample_text_no_nl):
+            # the first two elements are the same for all versioned files:
+            # - the digest and the size of the text. For some versioned files
+            #   additional data is returned in additional tuple elements.
+            result = vf.add_lines(version, [], lines)
+            self.assertEqual(3, len(result))
+            self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+                result[0:2])
+        # parents should not affect the result:
+        lines = sample_text_nl[1]
+        self.assertEqual((osutils.sha_strings(lines), sum(map(len, lines))),
+            vf.add_lines('d', ['b', 'c'], lines)[0:2])
+
     def test_get_reserved(self):
         vf = self.get_file()
         self.assertRaises(errors.ReservedId, vf.get_delta, 'b:')

=== modified file 'bzrlib/tests/test_weave.py'
--- a/bzrlib/tests/test_weave.py	2007-08-15 04:33:34 +0000
+++ b/bzrlib/tests/test_weave.py	2007-09-03 02:58:58 +0000
@@ -39,6 +39,7 @@
 
 
 class TestBase(TestCase):
+
     def check_read_write(self, k):
         """Check the weave k can be written & re-read."""
         from tempfile import TemporaryFile
@@ -75,16 +76,6 @@
         k = Weave()
 
 
-class StoreText(TestBase):
-    """Store and retrieve a simple text."""
-
-    def test_storing_text(self):
-        k = Weave()
-        idx = k.add_lines('text0', [], TEXT_0)
-        self.assertEqual(k.get_lines(idx), TEXT_0)
-        self.assertEqual(idx, 0)
-
-
 class AnnotateOne(TestBase):
     def runTest(self):
         k = Weave()
@@ -93,20 +84,6 @@
                          [('text0', TEXT_0[0])])
 
 
-class StoreTwo(TestBase):
-    def runTest(self):
-        k = Weave()
-
-        idx = k.add_lines('text0', [], TEXT_0)
-        self.assertEqual(idx, 0)
-
-        idx = k.add_lines('text1', [], TEXT_1)
-        self.assertEqual(idx, 1)
-
-        self.assertEqual(k.get_lines(0), TEXT_0)
-        self.assertEqual(k.get_lines(1), TEXT_1)
-
-
 class GetSha1(TestBase):
     def test_get_sha1(self):
         k = Weave()
@@ -133,7 +110,8 @@
 
 class RepeatedAdd(TestBase):
     """Add the same version twice; harmless."""
-    def runTest(self):
+
+    def test_duplicate_add(self):
         k = Weave()
         idx = k.add_lines('text0', [], TEXT_0)
         idx2 = k.add_lines('text0', [], TEXT_0)

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-08-30 08:11:54 +0000
+++ b/bzrlib/versionedfile.py	2007-09-03 20:23:05 +0000
@@ -130,9 +130,9 @@
         :param left_matching_blocks: a hint about which areas are common
             between the text and its left-hand-parent.  The format is
             the SequenceMatcher.get_matching_blocks format.
-        :return: An opaque representation of the inserted version which can be
-                 provided back to future add_lines calls in the parent_texts
-                 dictionary.
+        :return: The text sha1, the number of bytes in the text, and an opaque
+                 representation of the inserted version which can be provided
+                 back to future add_lines calls in the parent_texts dictionary.
         """
         version_id = osutils.safe_revision_id(version_id)
         parents = [osutils.safe_revision_id(v) for v in parents]
@@ -318,7 +318,7 @@
                     mpvf.get_diff(parent_ids[0]).num_lines()))
             else:
                 left_matching_blocks = None
-            version_text = self.add_lines(version, parent_ids, lines,
+            _, _, version_text = self.add_lines(version, parent_ids, lines,
                 vf_parents, left_matching_blocks=left_matching_blocks)
             vf_parents[version] = version_text
         for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
@@ -687,7 +687,7 @@
             # deltas = self.source.get_deltas(order)
             for index, version in enumerate(order):
                 pb.update('Converting versioned data', index, len(order))
-                parent_text = target.add_lines(version,
+                _, _, parent_text = target.add_lines(version,
                                                self.source.get_parents(version),
                                                self.source.get_lines(version),
                                                parent_texts=parent_texts)

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2007-08-15 06:46:33 +0000
+++ b/bzrlib/weave.py	2007-09-03 02:58:58 +0000
@@ -419,7 +419,8 @@
     def _add_lines(self, version_id, parents, lines, parent_texts,
                    left_matching_blocks=None):
         """See VersionedFile.add_lines."""
-        return self._add(version_id, lines, map(self._lookup, parents))
+        idx = self._add(version_id, lines, map(self._lookup, parents))
+        return sha_strings(lines), sum(map(len, lines)), idx
 
     def _add(self, version_id, lines, parents, sha1=None):
         """Add a single text on top of the weave.
@@ -491,7 +492,7 @@
         # another small special case: a merge, producing the same text
         # as auto-merge
         if lines == basis_lines:
-            return new_version            
+            return new_version
 
         # add a sentinel, because we can also match against the final line
         basis_lineno.append(len(self._weave))




More information about the bazaar-commits mailing list