Rev 3568: * Fetching data between repositories that have the same model but no in http://people.ubuntu.com/~robertc/baz2.0/btree-graphindex

Robert Collins robertc at robertcollins.net
Mon Jul 28 09:39:29 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/btree-graphindex

------------------------------------------------------------
revno: 3568
revision-id: robertc at robertcollins.net-20080728083915-7p1w6504b661e31g
parent: robertc at robertcollins.net-20080728060842-uayjy217i066dvct
committer: Robert Collins <robertc at robertcollins.net>
branch nick: btree-graphindex
timestamp: Mon 2008-07-28 18:39:15 +1000
message:
   * Fetching data between repositories that have the same model but no 
     optimised fetcher will not reserialise all the revisions, increasing
     performance. (Robert Collins, John Arbash Meinel)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'NEWS'
--- a/NEWS	2008-07-28 06:08:42 +0000
+++ b/NEWS	2008-07-28 08:39:15 +0000
@@ -28,6 +28,9 @@
       Anywhere from 2x-6x faster depending on the size of the tree (bigger
       trees, bigger benefit.) (John Arbash Meinel)
 
+    * Fetching data between repositories that have the same model but no 
+      optimised fetcher will not reserialise all the revisions, increasing
+      performance. (Robert Collins, John Arbash Meinel)
 
   BUG FIXES:
 

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2008-07-28 05:09:54 +0000
+++ b/bzrlib/fetch.py	2008-07-28 08:39:15 +0000
@@ -245,57 +245,6 @@
         finally:
             child_pb.finished()
 
-    def _generate_root_texts(self, revs):
-        """This will be called by __fetch between fetching weave texts and
-        fetching the inventory weave.
-
-        Subclasses should override this if they need to generate root texts
-        after fetching weave texts.
-        """
-        pass
-
-
-class GenericRepoFetcher(RepoFetcher):
-    """This is a generic repo to repo fetcher.
-
-    This makes minimal assumptions about repo layout and contents.
-    It triggers a reconciliation after fetching to ensure integrity.
-    """
-
-    def _fetch_revision_texts(self, revs, pb):
-        """Fetch revision object texts"""
-        count = 0
-        total = len(revs)
-        for rev in revs:
-            pb.update('copying revisions', count, total)
-            try:
-                sig_text = self.from_repository.get_signature_text(rev)
-                self.to_repository.add_signature_text(rev, sig_text)
-            except errors.NoSuchRevision:
-                # not signed.
-                pass
-            self._copy_revision(rev)
-            count += 1
-        # fixup inventory if needed: 
-        # this is expensive because we have no inverse index to current ghosts.
-        # but on local disk its a few seconds and sftp push is already insane.
-        # so we just-do-it.
-        # FIXME: repository should inform if this is needed.
-        self.to_repository.reconcile()
-
-    def _copy_revision(self, rev_id):
-        rev = self.from_repository.get_revision(rev_id)
-        self.to_repository.add_revision(rev_id, rev)
-
-
-class KnitRepoFetcher(RepoFetcher):
-    """This is a knit format repository specific fetcher.
-
-    This differs from the GenericRepoFetcher by not doing a 
-    reconciliation after copying, and using knit joining to
-    copy revision texts.
-    """
-
     def _fetch_revision_texts(self, revs, pb):
         # may need to be a InterRevisionStore call here.
         to_sf = self.to_repository.signatures
@@ -315,6 +264,26 @@
             self.to_repository._fetch_order,
             self.to_repository._fetch_uses_deltas))
 
+    def _generate_root_texts(self, revs):
+        """This will be called by __fetch between fetching weave texts and
+        fetching the inventory weave.
+
+        Subclasses should override this if they need to generate root texts
+        after fetching weave texts.
+        """
+        pass
+
+
+class GenericRepoFetcher(RepoFetcher):
+    """This is a generic repo to repo fetcher.
+
+    This triggers a reconciliation after fetching to ensure integrity.
+    """
+
+    def _fetch_revision_texts(self, revs, pb):
+        RepoFetcher._fetch_revision_texts(self, revs, pb)
+        self.to_repository.reconcile()
+
 
 class Inter1and2Helper(object):
     """Helper for operations that convert data from model 1 and 2
@@ -424,13 +393,13 @@
             self.target.add_revision(revision.revision_id, revision)
 
 
-class Model1toKnit2Fetcher(GenericRepoFetcher):
+class Model1toKnit2Fetcher(RepoFetcher):
     """Fetch from a Model1 repository into a Knit2 repository
     """
     def __init__(self, to_repository, from_repository, last_revision=None,
                  pb=None, find_ghosts=True):
         self.helper = Inter1and2Helper(from_repository, to_repository)
-        GenericRepoFetcher.__init__(self, to_repository, from_repository,
+        RepoFetcher.__init__(self, to_repository, from_repository,
             last_revision, pb, find_ghosts)
 
     def _generate_root_texts(self, revs):
@@ -439,17 +408,38 @@
     def _fetch_inventory_weave(self, revs, pb):
         self.helper.regenerate_inventory(revs)
 
+    def _fetch_revision_texts(self, revs, pb):
+        """Fetch revision object texts"""
+        count = 0
+        total = len(revs)
+        for rev in revs:
+            pb.update('copying revisions', count, total)
+            try:
+                sig_text = self.from_repository.get_signature_text(rev)
+                self.to_repository.add_signature_text(rev, sig_text)
+            except errors.NoSuchRevision:
+                # not signed.
+                pass
+            self._copy_revision(rev)
+            count += 1
+        # fixup inventory if needed: 
+        # this is expensive because we have no inverse index to current ghosts.
+        # but on local disk its a few seconds and sftp push is already insane.
+        # so we just-do-it.
+        # FIXME: repository should inform if this is needed.
+        self.to_repository.reconcile()
+
     def _copy_revision(self, rev):
         self.helper.fetch_revisions([rev])
 
 
-class Knit1to2Fetcher(KnitRepoFetcher):
+class Knit1to2Fetcher(RepoFetcher):
     """Fetch from a Knit1 repository into a Knit2 repository"""
 
-    def __init__(self, to_repository, from_repository, last_revision=None, 
+    def __init__(self, to_repository, from_repository, last_revision=None,
                  pb=None, find_ghosts=True):
         self.helper = Inter1and2Helper(from_repository, to_repository)
-        KnitRepoFetcher.__init__(self, to_repository, from_repository,
+        RepoFetcher.__init__(self, to_repository, from_repository,
             last_revision, pb, find_ghosts)
 
     def _generate_root_texts(self, revs):

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-07-28 06:08:42 +0000
+++ b/bzrlib/repository.py	2008-07-28 08:39:15 +0000
@@ -2658,10 +2658,10 @@
     @needs_write_lock
     def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
-        from bzrlib.fetch import KnitRepoFetcher
+        from bzrlib.fetch import RepoFetcher
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
                self.source, self.source._format, self.target, self.target._format)
-        f = KnitRepoFetcher(to_repository=self.target,
+        f = RepoFetcher(to_repository=self.target,
                             from_repository=self.source,
                             last_revision=revision_id,
                             pb=pb, find_ghosts=find_ghosts)
@@ -2729,8 +2729,8 @@
     def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         if len(self.source._fallback_repositories) > 0:
-            from bzrlib.fetch import KnitRepoFetcher
-            fetcher = KnitRepoFetcher(self.target, self.source, revision_id,
+            from bzrlib.fetch import RepoFetcher
+            fetcher = RepoFetcher(self.target, self.source, revision_id,
                                       pb, find_ghosts)
             return fetcher.count_copied, fetcher.failed_revisions
         from bzrlib.repofmt.pack_repo import Packer




More information about the bazaar-commits mailing list