Rev 3850: Part of bug #300289, stop requiring plain fulltexts for revisions. in http://bzr.arbash-meinel.com/branches/bzr/1.10-dev/revision_stream

John Arbash Meinel john at arbash-meinel.com
Tue Nov 25 20:40:17 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.10-dev/revision_stream

------------------------------------------------------------
revno: 3850
revision-id: john at arbash-meinel.com-20081125203957-6ru90u67o1x07prb
parent: pqm at pqm.ubuntu.com-20081125152232-c22rycit2dfzm11f
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: revision_stream
timestamp: Tue 2008-11-25 14:39:57 -0600
message:
  Part of bug #300289, stop requiring plain fulltexts for revisions.
  
  The generic fetch code was always extracting the fulltext for revisions as part of
  the fix for bug #261339, however insert_record_stream is currently perfectly
  capable of upcasting to a fulltext on-the-fly.
  So we restore the behavior of copying knit-ft-gz without extracting it,
  and add another test that ensures if the source has a delta it gets expanded
  to a fulltext in the target.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2008-11-21 20:24:15 +0000
+++ b/NEWS	2008-11-25 20:39:57 +0000
@@ -20,6 +20,12 @@
 
   IMPROVEMENTS:
 
+    * The generic fetch code can once again copy revisions and signatures
+      without extracting them completely to fulltexts and then serializing
+      them back down into byte strings. This is a significant performance
+      improvement when fetching from a stacked branch.
+      (John Arbash Meinel, #300289)
+
     * When making a large readv() request over ``bzr+ssh``, break up the
       request into more manageable chunks. Because the RPC is not yet able
       to stream, this helps keep us from buffering too much information at

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2008-11-21 20:24:15 +0000
+++ b/bzrlib/fetch.py	2008-11-25 20:39:57 +0000
@@ -256,22 +256,19 @@
         to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(
             [(rev_id,) for rev_id in revs],
             self.to_repository._fetch_order,
-            True)))
-        # Bug #261339, some knit repositories accidentally had deltas in their
-        # revision stream, when you weren't ever supposed to have deltas.
-        # So we now *force* fulltext copying for signatures and revisions
+            not self.to_repository._fetch_uses_deltas)))
         self._fetch_just_revision_texts(revs)
 
     def _fetch_just_revision_texts(self, version_ids):
         to_rf = self.to_repository.revisions
         from_rf = self.from_repository.revisions
+        # If a revision has a delta, this is actually expanded inside the
+        # insert_record_stream code now, which is an alternate fix for
+        # bug #261339
         to_rf.insert_record_stream(from_rf.get_record_stream(
             [(rev_id,) for rev_id in version_ids],
             self.to_repository._fetch_order,
-            True))
-        # Bug #261339, some knit repositories accidentally had deltas in their
-        # revision stream, when you weren't ever supposed to have deltas.
-        # So we now *force* fulltext copying for signatures and revisions
+            not self.to_repository._fetch_uses_deltas))
 
     def _generate_root_texts(self, revs):
         """This will be called by __fetch between fetching weave texts and

=== modified file 'bzrlib/tests/test_fetch.py'
--- a/bzrlib/tests/test_fetch.py	2008-08-29 02:05:22 +0000
+++ b/bzrlib/tests/test_fetch.py	2008-11-25 20:39:57 +0000
@@ -374,12 +374,8 @@
         self.assertEqual(('get_record_stream', [('rev-one',)],
                           target._fetch_order, False),
                          self.find_get_record_stream(source.inventories.calls))
-        # Because of bugs in the old fetch code, revisions could accidentally
-        # have deltas present in knits. However, it was never intended, so we
-        # always for include_delta_closure=True, to make sure we get fulltexts.
-        # bug #261339
         self.assertEqual(('get_record_stream', [('rev-one',)],
-                          target._fetch_order, True),
+                          target._fetch_order, False),
                          self.find_get_record_stream(source.revisions.calls))
         # XXX: Signatures is special, and slightly broken. The
         # standard item_keys_introduced_by actually does a lookup for every
@@ -390,7 +386,7 @@
         # we care about.
         signature_calls = source.signatures.calls[-1:]
         self.assertEqual(('get_record_stream', [('rev-one',)],
-                          target._fetch_order, True),
+                          target._fetch_order, False),
                          self.find_get_record_stream(signature_calls))
 
     def test_fetch_no_deltas_with_delta_closure(self):
@@ -432,6 +428,37 @@
                           target._fetch_order, True),
                          self.find_get_record_stream(signature_calls))
 
+    def test_fetch_revisions_with_deltas_into_pack(self):
+        # See BUG #261339, dev versions of bzr could accidentally create deltas
+        # in revision texts in knit branches (when fetching from packs). So we
+        # ensure that *if* a knit repository has a delta in revisions, that it
+        # gets properly expanded back into a fulltext when stored in the pack
+        # file.
+        tree = self.make_branch_and_tree('source', format='dirstate')
+        target = self.make_repository('target', format='pack-0.92')
+        self.build_tree(['source/file'])
+        tree.set_root_id('root-id')
+        tree.add('file', 'file-id')
+        tree.commit('one', rev_id='rev-one')
+        # Hack the KVF for revisions so that it "accidentally" allows a delta
+        tree.branch.repository.revisions._max_delta_chain = 200
+        tree.commit('two', rev_id='rev-two')
+        source = tree.branch.repository
+        # Ensure that we stored a delta
+        source.lock_read()
+        self.addCleanup(source.unlock)
+        record = source.revisions.get_record_stream([('rev-two',)],
+            'unordered', False).next()
+        self.assertEqual('knit-delta-gz', record.storage_kind)
+        target.fetch(tree.branch.repository, revision_id='rev-two')
+        # The record should get expanded back to a fulltext
+        target.lock_read()
+        self.addCleanup(target.unlock)
+        record = target.revisions.get_record_stream([('rev-two',)],
+            'unordered', False).next()
+        self.assertEqual('knit-ft-gz', record.storage_kind)
+
+
 
 class Test1To2Fetch(TestCaseWithTransport):
     """Tests for Model1To2 failure modes"""



More information about the bazaar-commits mailing list