Rev 2950: (robertc) Stop reading all history during fetch operations on packs. (Robert Collins, #88319) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Oct 30 20:34:36 GMT 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2950
revision-id: pqm at pqm.ubuntu.com-20071030203434-6ffpvdncd12ncx3d
parent: pqm at pqm.ubuntu.com-20071029221703-zy7q7a0ehfvpybtn
parent: robertc at robertcollins.net-20071030194231-07hv0tjz76xzfoom
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-10-30 20:34:34 +0000
message:
  (robertc) Stop reading all history during fetch operations on packs. (Robert Collins, #88319)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repofmt/weaverepo.py    presplitout.py-20070125045333-wfav3tsh73oxu3zk-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
  bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
    ------------------------------------------------------------
    revno: 2949.1.4
    merged: robertc at robertcollins.net-20071030194231-07hv0tjz76xzfoom
    parent: robertc at robertcollins.net-20071030183733-cygr23zc5sycopbg
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Wed 2007-10-31 06:42:31 +1100
    message:
      Use a list comprehension not a generator.
    ------------------------------------------------------------
    revno: 2949.1.3
    merged: robertc at robertcollins.net-20071030183733-cygr23zc5sycopbg
    parent: robertc at robertcollins.net-20071030173911-ams9jk9pw8uein0j
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Wed 2007-10-31 05:37:33 +1100
    message:
      Review feedback.
    ------------------------------------------------------------
    revno: 2949.1.2
    merged: robertc at robertcollins.net-20071030173911-ams9jk9pw8uein0j
    parent: robertc at robertcollins.net-20071029235541-41o2q61c7hwagcol
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Wed 2007-10-31 04:39:11 +1100
    message:
      * Fetch with pack repositories will no longer read the entire history graph.
        (Robert Collins, #88319)
    ------------------------------------------------------------
    revno: 2949.1.1
    merged: robertc at robertcollins.net-20071029235541-41o2q61c7hwagcol
    parent: pqm at pqm.ubuntu.com-20071029221703-zy7q7a0ehfvpybtn
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Tue 2007-10-30 10:55:41 +1100
    message:
      Change Repository.fetch to provide a find_ghosts parameter which triggers ghost filling.
=== modified file 'NEWS'
--- a/NEWS	2007-10-29 04:05:13 +0000
+++ b/NEWS	2007-10-30 17:39:11 +0000
@@ -68,6 +68,9 @@
      changed to determine if the tree is unchanged rather than recalculating
      it at the end of the commit process. (Robert Collins)
 
+   * Fetch with pack repositories will no longer read the entire history graph.
+     (Robert Collins, #88319)
+
    * Inventory serialisation no longer double-sha's the content.
      (Robert Collins)
 

=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2007-10-25 09:20:51 +0000
+++ b/bzrlib/errors.py	2007-10-30 17:39:11 +0000
@@ -1252,7 +1252,7 @@
 
 class WeaveParentMismatch(WeaveError):
 
-    _fmt = "Parents are mismatched between two revisions."
+    _fmt = "Parents are mismatched between two revisions. %(message)s"
     
 
 class WeaveInvalidChecksum(WeaveError):

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-10-30 17:39:11 +0000
@@ -323,6 +323,8 @@
     # Set this attribute in derived clases to control the _serializer that the
     # repository objects will have passed to their constructor.
     _serializer = xml5.serializer_v5
+    # Knit based repositories handle ghosts reasonably well.
+    supports_ghosts = True
 
     def _get_control_store(self, repo_transport, control_files):
         """Return the control store for this repository."""

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-10-29 21:00:22 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-10-30 17:39:11 +0000
@@ -1519,6 +1519,7 @@
             self._pack_collection.reset()
             # XXX: Better to do an in-memory merge when acquiring a new lock -
             # factor out code from _save_pack_names.
+            self._pack_collection.ensure_loaded()
 
     def _start_write_group(self):
         self._pack_collection._start_write_group()

=== modified file 'bzrlib/repofmt/weaverepo.py'
--- a/bzrlib/repofmt/weaverepo.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/repofmt/weaverepo.py	2007-10-30 17:39:11 +0000
@@ -319,13 +319,10 @@
 
     rich_root_data = False
     supports_tree_reference = False
+    supports_ghosts = False
 
     def initialize(self, a_bzrdir, shared=False, _internal=False):
-        """Create a weave repository.
-        
-        TODO: when creating split out bzr branch formats, move this to a common
-        base for Format5, Format6. or something like that.
-        """
+        """Create a weave repository."""
         if shared:
             raise errors.IncompatibleFormat(self, a_bzrdir._format)
 
@@ -519,6 +516,7 @@
     """
 
     _versionedfile_class = weave.WeaveFile
+    supports_ghosts = False
 
     def _get_control_store(self, repo_transport, control_files):
         """Return the control store for this repository."""

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-10-28 23:47:31 +0000
+++ b/bzrlib/repository.py	2007-10-30 19:42:31 +0000
@@ -840,10 +840,13 @@
         extent possible considering file system caching etc).
         """
 
-    def fetch(self, source, revision_id=None, pb=None):
+    def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
         """Fetch the content required to construct revision_id from source.
 
         If revision_id is None all content is copied.
+        :param find_ghosts: Find and copy revisions in the source that are
+            ghosts in the target (and not reachable directly by walking out to
+            the first-present revision in target from revision_id).
         """
         # fast path same-url fetch operations
         if self.has_same_location(source):
@@ -855,7 +858,7 @@
             return 0, []
         inter = InterRepository.get(source, self)
         try:
-            return inter.fetch(revision_id=revision_id, pb=pb)
+            return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
         except NotImplementedError:
             raise errors.IncompatibleRepositories(source, self)
 
@@ -1771,6 +1774,10 @@
     parameterisation.
     """
 
+    # Set to True or False in derived classes. True indicates that the format
+    # supports ghosts gracefully.
+    supports_ghosts = None
+
     def __str__(self):
         return "<%s>" % self.__class__.__name__
 
@@ -2006,7 +2013,7 @@
     def copy_content(self, revision_id=None):
         raise NotImplementedError(self.copy_content)
 
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """Fetch the content required to construct revision_id.
 
         The content is copied from self.source to self.target.
@@ -2098,7 +2105,7 @@
         self.target.fetch(self.source, revision_id=revision_id)
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import GenericRepoFetcher
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2177,7 +2184,7 @@
             self.target.fetch(self.source, revision_id=revision_id)
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import GenericRepoFetcher
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2255,7 +2262,7 @@
         return are_knits and InterRepository._same_model(source, target)
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import KnitRepoFetcher
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2322,7 +2329,7 @@
         return are_packs and InterRepository._same_model(source, target)
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
                self.source, self.source._format, self.target, self.target._format)
@@ -2346,7 +2353,8 @@
             return
         else:
             try:
-                revision_ids = self.missing_revision_ids(revision_id)
+                revision_ids = self.missing_revision_ids(revision_id,
+                    find_ghosts=find_ghosts)
             except errors.NoSuchRevision:
                 raise errors.InstallFailed([revision_id])
         packs = self.source._pack_collection.all_packs()
@@ -2364,9 +2372,32 @@
             return 0
 
     @needs_read_lock
-    def missing_revision_ids(self, revision_id=None):
-        """See InterRepository.missing_revision_ids()."""
-        if revision_id is not None:
+    def missing_revision_ids(self, revision_id=None, find_ghosts=True):
+        """See InterRepository.missing_revision_ids().
+        
+        :param find_ghosts: Find ghosts throughough the ancestry of
+            revision_id.
+        """
+        if not find_ghosts and revision_id is not None:
+            graph = self.source.get_graph()
+            missing_revs = set()
+            searcher = graph._make_breadth_first_searcher([revision_id])
+            target_index = \
+                self.target._pack_collection.revision_index.combined_index
+            null_set = frozenset([_mod_revision.NULL_REVISION])
+            while True:
+                try:
+                    next_revs = set(searcher.next())
+                except StopIteration:
+                    break
+                next_revs.difference_update(null_set)
+                target_keys = [(key,) for key in next_revs]
+                have_revs = frozenset(node[1][0] for node in
+                    target_index.iter_entries(target_keys))
+                missing_revs.update(next_revs - have_revs)
+                searcher.stop_searching_any(have_revs)
+            return missing_revs
+        elif revision_id is not None:
             source_ids = self.source.get_ancestry(revision_id)
             assert source_ids[0] is None
             source_ids.pop(0)
@@ -2394,7 +2425,7 @@
             return False
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import Model1toKnit2Fetcher
         f = Model1toKnit2Fetcher(to_repository=self.target,
@@ -2448,7 +2479,7 @@
             return False
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import Knit1to2Fetcher
         mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2481,7 +2512,7 @@
         return real_source._format == target._format
 
     @needs_write_lock
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         """See InterRepository.fetch()."""
         from bzrlib.fetch import RemoteToOtherFetcher
         mutter("Using fetch logic to copy between %s(remote) and %s(%s)",
@@ -2521,7 +2552,7 @@
         self._ensure_real_inter()
         self._real_inter.copy_content(revision_id=revision_id)
 
-    def fetch(self, revision_id=None, pb=None):
+    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
         self._ensure_real_inter()
         self._real_inter.fetch(revision_id=revision_id, pb=pb)
 

=== modified file 'bzrlib/tests/interrepository_implementations/__init__.py'
--- a/bzrlib/tests/interrepository_implementations/__init__.py	2007-06-28 04:25:15 +0000
+++ b/bzrlib/tests/interrepository_implementations/__init__.py	2007-10-30 17:39:11 +0000
@@ -94,6 +94,9 @@
         result.append((InterModel1and2,
                        weaverepo.RepositoryFormat5(),
                        knitrepo.RepositoryFormatKnit3()))
+        result.append((InterModel1and2,
+                       knitrepo.RepositoryFormatKnit1(),
+                       knitrepo.RepositoryFormatKnit3()))
         result.append((InterKnit1and2,
                        knitrepo.RepositoryFormatKnit1(),
                        knitrepo.RepositoryFormatKnit3()))

=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-10-30 17:39:11 +0000
@@ -295,52 +295,58 @@
 
 class TestCaseWithGhosts(TestCaseWithInterRepository):
 
-    def setUp(self):
-        super(TestCaseWithGhosts, self).setUp()
-        # we want two repositories at this point
+    def test_fetch_all_fixes_up_ghost(self):
+        # we want two repositories at this point:
         # one with a revision that is a ghost in the other
         # repository.
-
-        # 'ghost' is a ghost in missing_ghost and not in with_ghost_rev
-        repo = self.make_repository('with_ghost_rev')
-        repo.lock_write()
-        builder = repo.get_commit_builder(None, [], None,
-            committer="Foo Bar <foo at example.com>",
-            revision_id='ghost')
-        ie = bzrlib.inventory.InventoryDirectory('TREE_ROOT', '', None)
-        builder.record_entry_contents(ie, [], '', None,
-            ('directory', None, None, None))
-        builder.finish_inventory()
-        builder.commit("Message")
-        repo.unlock()
-         
-        repo = self.make_to_repository('missing_ghost')
-        inv = Inventory(revision_id='with_ghost')
-        inv.root.revision = 'with_ghost'
-        repo.lock_write()
-        repo.start_write_group()
-        sha1 = repo.add_inventory('with_ghost', inv, [])
-        rev = bzrlib.revision.Revision(timestamp=0,
-                                       timezone=None,
-                                       committer="Foo Bar <foo at example.com>",
-                                       message="Message",
-                                       inventory_sha1=sha1,
-                                       revision_id='with_ghost')
-        rev.parent_ids = ['ghost']
-        repo.add_revision('with_ghost', rev)
-        repo.commit_write_group()
-        repo.unlock()
-
-    def test_fetch_all_fixes_up_ghost(self):
-        # fetching from a repo with a current ghost unghosts it in referencing
-        # revisions.
-        repo = repository.Repository.open('missing_ghost')
-        rev = repo.get_revision('with_ghost')
-        from_repo = repository.Repository.open('with_ghost_rev')
-        repo.fetch(from_repo)
+        # 'ghost' is present in has_ghost, 'ghost' is absent in 'missing_ghost'.
+        # 'references' is present in both repositories, and 'tip' is present
+        # just in has_ghost.
+        # has_ghost       missing_ghost
+        #------------------------------
+        # 'ghost'             -
+        # 'references'    'references'
+        # 'tip'               -
+        # In this test we fetch 'tip' which should not fetch 'ghost'
+        has_ghost = self.make_repository('has_ghost')
+        missing_ghost = self.make_repository('missing_ghost')
+        if [True, True] != [repo._format.supports_ghosts for repo in
+            (has_ghost, missing_ghost)]:
+            raise TestNotApplicable("Need ghost support.")
+
+        def add_commit(repo, revision_id, parent_ids):
+            repo.lock_write()
+            repo.start_write_group()
+            inv = Inventory(revision_id=revision_id)
+            inv.root.revision = revision_id
+            root_id = inv.root.file_id
+            sha1 = repo.add_inventory(revision_id, inv, parent_ids)
+            vf = repo.weave_store.get_weave_or_empty(root_id,
+                repo.get_transaction())
+            vf.add_lines(revision_id, [], [])
+            rev = bzrlib.revision.Revision(timestamp=0,
+                                           timezone=None,
+                                           committer="Foo Bar <foo at example.com>",
+                                           message="Message",
+                                           inventory_sha1=sha1,
+                                           revision_id=revision_id)
+            rev.parent_ids = parent_ids
+            repo.add_revision(revision_id, rev)
+            repo.commit_write_group()
+            repo.unlock()
+        add_commit(has_ghost, 'ghost', [])
+        add_commit(has_ghost, 'references', ['ghost'])
+        add_commit(missing_ghost, 'references', ['ghost'])
+        add_commit(has_ghost, 'tip', ['references'])
+        missing_ghost.fetch(has_ghost, 'tip', find_ghosts=True)
+        # missing ghost now has tip and ghost.
+        rev = missing_ghost.get_revision('tip')
+        inv = missing_ghost.get_inventory('tip')
+        rev = missing_ghost.get_revision('ghost')
+        inv = missing_ghost.get_inventory('ghost')
         # rev must not be corrupt now
-        rev = repo.get_revision('with_ghost')
-        self.assertEqual([None, 'ghost', 'with_ghost'], repo.get_ancestry('with_ghost'))
+        self.assertEqual([None, 'ghost', 'references', 'tip'],
+            missing_ghost.get_ancestry('tip'))
 
 
 class TestFetchDependentData(TestCaseWithInterRepository):

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2007-10-25 07:34:32 +0000
+++ b/bzrlib/tests/test_repository.py	2007-10-30 17:39:11 +0000
@@ -1084,6 +1084,56 @@
         repo2.break_lock()
         self.assertRaises(errors.LockBroken, repo._pack_collection._unlock_names)
 
+    def test_fetch_without_find_ghosts_ignores_ghosts(self):
+        # we want two repositories at this point:
+        # one with a revision that is a ghost in the other
+        # repository.
+        # 'ghost' is present in has_ghost, 'ghost' is absent in 'missing_ghost'.
+        # 'references' is present in both repositories, and 'tip' is present
+        # just in has_ghost.
+        # has_ghost       missing_ghost
+        #------------------------------
+        # 'ghost'             -
+        # 'references'    'references'
+        # 'tip'               -
+        # In this test we fetch 'tip' which should not fetch 'ghost'
+        has_ghost = self.make_repository('has_ghost', format=self.get_format())
+        missing_ghost = self.make_repository('missing_ghost',
+            format=self.get_format())
+
+        def add_commit(repo, revision_id, parent_ids):
+            repo.lock_write()
+            repo.start_write_group()
+            inv = inventory.Inventory(revision_id=revision_id)
+            inv.root.revision = revision_id
+            root_id = inv.root.file_id
+            sha1 = repo.add_inventory(revision_id, inv, [])
+            vf = repo.weave_store.get_weave_or_empty(root_id,
+                repo.get_transaction())
+            vf.add_lines(revision_id, [], [])
+            rev = bzrlib.revision.Revision(timestamp=0,
+                                           timezone=None,
+                                           committer="Foo Bar <foo at example.com>",
+                                           message="Message",
+                                           inventory_sha1=sha1,
+                                           revision_id=revision_id)
+            rev.parent_ids = parent_ids
+            repo.add_revision(revision_id, rev)
+            repo.commit_write_group()
+            repo.unlock()
+        add_commit(has_ghost, 'ghost', [])
+        add_commit(has_ghost, 'references', ['ghost'])
+        add_commit(missing_ghost, 'references', ['ghost'])
+        add_commit(has_ghost, 'tip', ['references'])
+        missing_ghost.fetch(has_ghost, 'tip')
+        # missing ghost now has tip and not ghost.
+        rev = missing_ghost.get_revision('tip')
+        inv = missing_ghost.get_inventory('tip')
+        self.assertRaises(errors.NoSuchRevision,
+            missing_ghost.get_revision, 'ghost')
+        self.assertRaises(errors.RevisionNotPresent,
+            missing_ghost.get_inventory, 'ghost')
+
 
 class TestKnitPackSubtrees(TestKnitPackNoSubtrees):
 




More information about the bazaar-commits mailing list