Rev 3568: Hack together a bunch of changes to allow inserting texts in reverse topological order during the generic fetch. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/reordered

Fri Jul 25 20:42:51 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/reordered

------------------------------------------------------------
revno: 3568
revision-id: john at arbash-meinel.com-20080725194239-7phiux160is29uth
parent: pqm at pqm.ubuntu.com-20080721151553-11iasd1407hkznk1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: reordered
timestamp: Fri 2008-07-25 14:42:39 -0500
message:
  Hack together a bunch of changes to allow inserting texts in reverse topological order during the generic fetch.
  
  The biggest thing is that we can't compact the lines into byte-strings as it
  grossly bloats memory. (_get_content_maps() shares strings between revisions
  of the same text. ''.join() allocates new strings for everything, and doesn't
  free the other memory.
  Also, GenericRepoFetcher should request fulltexts, rather than requesting the deltas.
  We have to unpack the deltas on the other side, and that requires building up from
  the delta chain, etc. Which is very inefficient.
  The one thing we *could* do is keep a text cache.
  But again, going back to a single String bloats memory versus, say, lines or even chunks.
-------------- next part --------------
=== modified file 'bzrlib/fetch.py'

--- a/bzrlib/fetch.py	2008-06-25 10:06:48 +0000
+++ b/bzrlib/fetch.py	2008-07-25 19:42:39 +0000
@@ -179,7 +179,7 @@
                     to_texts = self.to_repository.texts
                     from_texts = self.from_repository.texts
                     to_texts.insert_record_stream(from_texts.get_record_stream(
-                        text_keys, 'topological', False))
+                        text_keys, 'topological', True))
                     # Cause an error if a text occurs after we have done the
                     # copy.
                     text_keys = None
@@ -239,7 +239,7 @@
             # corrupt.
             to_weave.insert_record_stream(from_weave.get_record_stream(
                 [(rev_id,) for rev_id in revs],
-                'topological', False))
+                'topological', True))
         finally:
             child_pb.finished()
 
@@ -261,29 +261,22 @@
     """
 
     def _fetch_revision_texts(self, revs, pb):
-        """Fetch revision object texts"""
-        count = 0
-        total = len(revs)
-        for rev in revs:
-            pb.update('copying revisions', count, total)
-            try:
-                sig_text = self.from_repository.get_signature_text(rev)
-                self.to_repository.add_signature_text(rev, sig_text)
-            except errors.NoSuchRevision:
-                # not signed.
-                pass
-            self._copy_revision(rev)
-            count += 1
-        # fixup inventory if needed: 
-        # this is expensive because we have no inverse index to current ghosts.
-        # but on local disk its a few seconds and sftp push is already insane.
-        # so we just-do-it.
-        # FIXME: repository should inform if this is needed.
+        # may need to be a InterRevisionStore call here.
+        to_sf = self.to_repository.signatures
+        from_sf = self.from_repository.signatures
+        # A missing signature is just skipped.
+        to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(
+            [(rev_id,) for rev_id in revs],
+            'unordered', False)))
+        self._fetch_just_revision_texts(revs)
         self.to_repository.reconcile()
 
-    def _copy_revision(self, rev_id):
-        rev = self.from_repository.get_revision(rev_id)
-        self.to_repository.add_revision(rev_id, rev)
+    def _fetch_just_revision_texts(self, version_ids):
+        to_rf = self.to_repository.revisions
+        from_rf = self.from_repository.revisions
+        to_rf.insert_record_stream(from_rf.get_record_stream(
+            [(rev_id,) for rev_id in version_ids],
+            'topological', False))
 
 
 class KnitRepoFetcher(RepoFetcher):

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-07-18 03:07:07 +0000
+++ b/bzrlib/knit.py	2008-07-25 19:42:39 +0000
@@ -1208,7 +1208,7 @@
                 needed_from_fallback - absent_keys)
             for key in present_keys:
                 yield FulltextContentFactory(key, global_map[key], None,
-                    ''.join(text_map[key]))
+                    text_map.pop(key))
         else:
             for source, keys in source_keys:
                 if source is parent_maps[0]:

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-07-16 14:23:05 +0000
+++ b/bzrlib/repository.py	2008-07-25 19:42:39 +0000
@@ -1532,7 +1532,11 @@
         texts = {}
         for record in stream:
             if record.storage_kind != 'absent':
-                texts[record.key] = record.get_bytes_as('fulltext')
+                bytes = record.get_bytes_as('fulltext')
+                if isinstance(bytes, list):
+                    texts[record.key] = ''.join(bytes)
+                else:
+                    texts[record.key] = bytes
             else:
                 raise errors.NoSuchRevision(self, record.key)
         for key in keys: