Rev 2950: (robertc) Stop reading all history during fetch operations on packs. (Robert Collins, #88319) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Oct 30 20:34:36 GMT 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2950
revision-id: pqm at pqm.ubuntu.com-20071030203434-6ffpvdncd12ncx3d
parent: pqm at pqm.ubuntu.com-20071029221703-zy7q7a0ehfvpybtn
parent: robertc at robertcollins.net-20071030194231-07hv0tjz76xzfoom
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-10-30 20:34:34 +0000
message:
(robertc) Stop reading all history during fetch operations on packs. (Robert Collins, #88319)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repofmt/weaverepo.py presplitout.py-20070125045333-wfav3tsh73oxu3zk-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 2949.1.4
merged: robertc at robertcollins.net-20071030194231-07hv0tjz76xzfoom
parent: robertc at robertcollins.net-20071030183733-cygr23zc5sycopbg
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Wed 2007-10-31 06:42:31 +1100
message:
Use a list comprehension not a generator.
------------------------------------------------------------
revno: 2949.1.3
merged: robertc at robertcollins.net-20071030183733-cygr23zc5sycopbg
parent: robertc at robertcollins.net-20071030173911-ams9jk9pw8uein0j
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Wed 2007-10-31 05:37:33 +1100
message:
Review feedback.
------------------------------------------------------------
revno: 2949.1.2
merged: robertc at robertcollins.net-20071030173911-ams9jk9pw8uein0j
parent: robertc at robertcollins.net-20071029235541-41o2q61c7hwagcol
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Wed 2007-10-31 04:39:11 +1100
message:
* Fetch with pack repositories will no longer read the entire history graph.
(Robert Collins, #88319)
------------------------------------------------------------
revno: 2949.1.1
merged: robertc at robertcollins.net-20071029235541-41o2q61c7hwagcol
parent: pqm at pqm.ubuntu.com-20071029221703-zy7q7a0ehfvpybtn
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Tue 2007-10-30 10:55:41 +1100
message:
Change Repository.fetch to provide a find_ghosts parameter which triggers ghost filling.
=== modified file 'NEWS'
--- a/NEWS 2007-10-29 04:05:13 +0000
+++ b/NEWS 2007-10-30 17:39:11 +0000
@@ -68,6 +68,9 @@
changed to determine if the tree is unchanged rather than recalculating
it at the end of the commit process. (Robert Collins)
+ * Fetch with pack repositories will no longer read the entire history graph.
+ (Robert Collins, #88319)
+
* Inventory serialisation no longer double-sha's the content.
(Robert Collins)
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2007-10-25 09:20:51 +0000
+++ b/bzrlib/errors.py 2007-10-30 17:39:11 +0000
@@ -1252,7 +1252,7 @@
class WeaveParentMismatch(WeaveError):
- _fmt = "Parents are mismatched between two revisions."
+ _fmt = "Parents are mismatched between two revisions. %(message)s"
class WeaveInvalidChecksum(WeaveError):
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-10-30 17:39:11 +0000
@@ -323,6 +323,8 @@
# Set this attribute in derived clases to control the _serializer that the
# repository objects will have passed to their constructor.
_serializer = xml5.serializer_v5
+ # Knit based repositories handle ghosts reasonably well.
+ supports_ghosts = True
def _get_control_store(self, repo_transport, control_files):
"""Return the control store for this repository."""
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-10-29 21:00:22 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-10-30 17:39:11 +0000
@@ -1519,6 +1519,7 @@
self._pack_collection.reset()
# XXX: Better to do an in-memory merge when acquiring a new lock -
# factor out code from _save_pack_names.
+ self._pack_collection.ensure_loaded()
def _start_write_group(self):
self._pack_collection._start_write_group()
=== modified file 'bzrlib/repofmt/weaverepo.py'
--- a/bzrlib/repofmt/weaverepo.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/repofmt/weaverepo.py 2007-10-30 17:39:11 +0000
@@ -319,13 +319,10 @@
rich_root_data = False
supports_tree_reference = False
+ supports_ghosts = False
def initialize(self, a_bzrdir, shared=False, _internal=False):
- """Create a weave repository.
-
- TODO: when creating split out bzr branch formats, move this to a common
- base for Format5, Format6. or something like that.
- """
+ """Create a weave repository."""
if shared:
raise errors.IncompatibleFormat(self, a_bzrdir._format)
@@ -519,6 +516,7 @@
"""
_versionedfile_class = weave.WeaveFile
+ supports_ghosts = False
def _get_control_store(self, repo_transport, control_files):
"""Return the control store for this repository."""
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-10-28 23:47:31 +0000
+++ b/bzrlib/repository.py 2007-10-30 19:42:31 +0000
@@ -840,10 +840,13 @@
extent possible considering file system caching etc).
"""
- def fetch(self, source, revision_id=None, pb=None):
+ def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
"""Fetch the content required to construct revision_id from source.
If revision_id is None all content is copied.
+ :param find_ghosts: Find and copy revisions in the source that are
+ ghosts in the target (and not reachable directly by walking out to
+ the first-present revision in target from revision_id).
"""
# fast path same-url fetch operations
if self.has_same_location(source):
@@ -855,7 +858,7 @@
return 0, []
inter = InterRepository.get(source, self)
try:
- return inter.fetch(revision_id=revision_id, pb=pb)
+ return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
except NotImplementedError:
raise errors.IncompatibleRepositories(source, self)
@@ -1771,6 +1774,10 @@
parameterisation.
"""
+ # Set to True or False in derived classes. True indicates that the format
+ # supports ghosts gracefully.
+ supports_ghosts = None
+
def __str__(self):
return "<%s>" % self.__class__.__name__
@@ -2006,7 +2013,7 @@
def copy_content(self, revision_id=None):
raise NotImplementedError(self.copy_content)
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""Fetch the content required to construct revision_id.
The content is copied from self.source to self.target.
@@ -2098,7 +2105,7 @@
self.target.fetch(self.source, revision_id=revision_id)
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import GenericRepoFetcher
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2177,7 +2184,7 @@
self.target.fetch(self.source, revision_id=revision_id)
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import GenericRepoFetcher
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2255,7 +2262,7 @@
return are_knits and InterRepository._same_model(source, target)
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import KnitRepoFetcher
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2322,7 +2329,7 @@
return are_packs and InterRepository._same_model(source, target)
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
self.source, self.source._format, self.target, self.target._format)
@@ -2346,7 +2353,8 @@
return
else:
try:
- revision_ids = self.missing_revision_ids(revision_id)
+ revision_ids = self.missing_revision_ids(revision_id,
+ find_ghosts=find_ghosts)
except errors.NoSuchRevision:
raise errors.InstallFailed([revision_id])
packs = self.source._pack_collection.all_packs()
@@ -2364,9 +2372,32 @@
return 0
@needs_read_lock
- def missing_revision_ids(self, revision_id=None):
- """See InterRepository.missing_revision_ids()."""
- if revision_id is not None:
+ def missing_revision_ids(self, revision_id=None, find_ghosts=True):
+ """See InterRepository.missing_revision_ids().
+
+ :param find_ghosts: Find ghosts throughough the ancestry of
+ revision_id.
+ """
+ if not find_ghosts and revision_id is not None:
+ graph = self.source.get_graph()
+ missing_revs = set()
+ searcher = graph._make_breadth_first_searcher([revision_id])
+ target_index = \
+ self.target._pack_collection.revision_index.combined_index
+ null_set = frozenset([_mod_revision.NULL_REVISION])
+ while True:
+ try:
+ next_revs = set(searcher.next())
+ except StopIteration:
+ break
+ next_revs.difference_update(null_set)
+ target_keys = [(key,) for key in next_revs]
+ have_revs = frozenset(node[1][0] for node in
+ target_index.iter_entries(target_keys))
+ missing_revs.update(next_revs - have_revs)
+ searcher.stop_searching_any(have_revs)
+ return missing_revs
+ elif revision_id is not None:
source_ids = self.source.get_ancestry(revision_id)
assert source_ids[0] is None
source_ids.pop(0)
@@ -2394,7 +2425,7 @@
return False
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import Model1toKnit2Fetcher
f = Model1toKnit2Fetcher(to_repository=self.target,
@@ -2448,7 +2479,7 @@
return False
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import Knit1to2Fetcher
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
@@ -2481,7 +2512,7 @@
return real_source._format == target._format
@needs_write_lock
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
from bzrlib.fetch import RemoteToOtherFetcher
mutter("Using fetch logic to copy between %s(remote) and %s(%s)",
@@ -2521,7 +2552,7 @@
self._ensure_real_inter()
self._real_inter.copy_content(revision_id=revision_id)
- def fetch(self, revision_id=None, pb=None):
+ def fetch(self, revision_id=None, pb=None, find_ghosts=False):
self._ensure_real_inter()
self._real_inter.fetch(revision_id=revision_id, pb=pb)
=== modified file 'bzrlib/tests/interrepository_implementations/__init__.py'
--- a/bzrlib/tests/interrepository_implementations/__init__.py 2007-06-28 04:25:15 +0000
+++ b/bzrlib/tests/interrepository_implementations/__init__.py 2007-10-30 17:39:11 +0000
@@ -94,6 +94,9 @@
result.append((InterModel1and2,
weaverepo.RepositoryFormat5(),
knitrepo.RepositoryFormatKnit3()))
+ result.append((InterModel1and2,
+ knitrepo.RepositoryFormatKnit1(),
+ knitrepo.RepositoryFormatKnit3()))
result.append((InterKnit1and2,
knitrepo.RepositoryFormatKnit1(),
knitrepo.RepositoryFormatKnit3()))
=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py 2007-10-30 17:39:11 +0000
@@ -295,52 +295,58 @@
class TestCaseWithGhosts(TestCaseWithInterRepository):
- def setUp(self):
- super(TestCaseWithGhosts, self).setUp()
- # we want two repositories at this point
+ def test_fetch_all_fixes_up_ghost(self):
+ # we want two repositories at this point:
# one with a revision that is a ghost in the other
# repository.
-
- # 'ghost' is a ghost in missing_ghost and not in with_ghost_rev
- repo = self.make_repository('with_ghost_rev')
- repo.lock_write()
- builder = repo.get_commit_builder(None, [], None,
- committer="Foo Bar <foo at example.com>",
- revision_id='ghost')
- ie = bzrlib.inventory.InventoryDirectory('TREE_ROOT', '', None)
- builder.record_entry_contents(ie, [], '', None,
- ('directory', None, None, None))
- builder.finish_inventory()
- builder.commit("Message")
- repo.unlock()
-
- repo = self.make_to_repository('missing_ghost')
- inv = Inventory(revision_id='with_ghost')
- inv.root.revision = 'with_ghost'
- repo.lock_write()
- repo.start_write_group()
- sha1 = repo.add_inventory('with_ghost', inv, [])
- rev = bzrlib.revision.Revision(timestamp=0,
- timezone=None,
- committer="Foo Bar <foo at example.com>",
- message="Message",
- inventory_sha1=sha1,
- revision_id='with_ghost')
- rev.parent_ids = ['ghost']
- repo.add_revision('with_ghost', rev)
- repo.commit_write_group()
- repo.unlock()
-
- def test_fetch_all_fixes_up_ghost(self):
- # fetching from a repo with a current ghost unghosts it in referencing
- # revisions.
- repo = repository.Repository.open('missing_ghost')
- rev = repo.get_revision('with_ghost')
- from_repo = repository.Repository.open('with_ghost_rev')
- repo.fetch(from_repo)
+ # 'ghost' is present in has_ghost, 'ghost' is absent in 'missing_ghost'.
+ # 'references' is present in both repositories, and 'tip' is present
+ # just in has_ghost.
+ # has_ghost missing_ghost
+ #------------------------------
+ # 'ghost' -
+ # 'references' 'references'
+ # 'tip' -
+ # In this test we fetch 'tip' which should not fetch 'ghost'
+ has_ghost = self.make_repository('has_ghost')
+ missing_ghost = self.make_repository('missing_ghost')
+ if [True, True] != [repo._format.supports_ghosts for repo in
+ (has_ghost, missing_ghost)]:
+ raise TestNotApplicable("Need ghost support.")
+
+ def add_commit(repo, revision_id, parent_ids):
+ repo.lock_write()
+ repo.start_write_group()
+ inv = Inventory(revision_id=revision_id)
+ inv.root.revision = revision_id
+ root_id = inv.root.file_id
+ sha1 = repo.add_inventory(revision_id, inv, parent_ids)
+ vf = repo.weave_store.get_weave_or_empty(root_id,
+ repo.get_transaction())
+ vf.add_lines(revision_id, [], [])
+ rev = bzrlib.revision.Revision(timestamp=0,
+ timezone=None,
+ committer="Foo Bar <foo at example.com>",
+ message="Message",
+ inventory_sha1=sha1,
+ revision_id=revision_id)
+ rev.parent_ids = parent_ids
+ repo.add_revision(revision_id, rev)
+ repo.commit_write_group()
+ repo.unlock()
+ add_commit(has_ghost, 'ghost', [])
+ add_commit(has_ghost, 'references', ['ghost'])
+ add_commit(missing_ghost, 'references', ['ghost'])
+ add_commit(has_ghost, 'tip', ['references'])
+ missing_ghost.fetch(has_ghost, 'tip', find_ghosts=True)
+ # missing ghost now has tip and ghost.
+ rev = missing_ghost.get_revision('tip')
+ inv = missing_ghost.get_inventory('tip')
+ rev = missing_ghost.get_revision('ghost')
+ inv = missing_ghost.get_inventory('ghost')
# rev must not be corrupt now
- rev = repo.get_revision('with_ghost')
- self.assertEqual([None, 'ghost', 'with_ghost'], repo.get_ancestry('with_ghost'))
+ self.assertEqual([None, 'ghost', 'references', 'tip'],
+ missing_ghost.get_ancestry('tip'))
class TestFetchDependentData(TestCaseWithInterRepository):
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2007-10-25 07:34:32 +0000
+++ b/bzrlib/tests/test_repository.py 2007-10-30 17:39:11 +0000
@@ -1084,6 +1084,56 @@
repo2.break_lock()
self.assertRaises(errors.LockBroken, repo._pack_collection._unlock_names)
+ def test_fetch_without_find_ghosts_ignores_ghosts(self):
+ # we want two repositories at this point:
+ # one with a revision that is a ghost in the other
+ # repository.
+ # 'ghost' is present in has_ghost, 'ghost' is absent in 'missing_ghost'.
+ # 'references' is present in both repositories, and 'tip' is present
+ # just in has_ghost.
+ # has_ghost missing_ghost
+ #------------------------------
+ # 'ghost' -
+ # 'references' 'references'
+ # 'tip' -
+ # In this test we fetch 'tip' which should not fetch 'ghost'
+ has_ghost = self.make_repository('has_ghost', format=self.get_format())
+ missing_ghost = self.make_repository('missing_ghost',
+ format=self.get_format())
+
+ def add_commit(repo, revision_id, parent_ids):
+ repo.lock_write()
+ repo.start_write_group()
+ inv = inventory.Inventory(revision_id=revision_id)
+ inv.root.revision = revision_id
+ root_id = inv.root.file_id
+ sha1 = repo.add_inventory(revision_id, inv, [])
+ vf = repo.weave_store.get_weave_or_empty(root_id,
+ repo.get_transaction())
+ vf.add_lines(revision_id, [], [])
+ rev = bzrlib.revision.Revision(timestamp=0,
+ timezone=None,
+ committer="Foo Bar <foo at example.com>",
+ message="Message",
+ inventory_sha1=sha1,
+ revision_id=revision_id)
+ rev.parent_ids = parent_ids
+ repo.add_revision(revision_id, rev)
+ repo.commit_write_group()
+ repo.unlock()
+ add_commit(has_ghost, 'ghost', [])
+ add_commit(has_ghost, 'references', ['ghost'])
+ add_commit(missing_ghost, 'references', ['ghost'])
+ add_commit(has_ghost, 'tip', ['references'])
+ missing_ghost.fetch(has_ghost, 'tip')
+ # missing ghost now has tip and not ghost.
+ rev = missing_ghost.get_revision('tip')
+ inv = missing_ghost.get_inventory('tip')
+ self.assertRaises(errors.NoSuchRevision,
+ missing_ghost.get_revision, 'ghost')
+ self.assertRaises(errors.RevisionNotPresent,
+ missing_ghost.get_inventory, 'ghost')
+
class TestKnitPackSubtrees(TestKnitPackNoSubtrees):
More information about the bazaar-commits
mailing list