[patch] check received weaves after join
Martin Pool
mbp at sourcefrog.net
Tue Mar 21 18:02:29 GMT 2006
Fix to give better detection of repositories which are either
inconsistent or (more likely?) look inconsistent because of http
arseclownery:
* extends the fileid_involved to indicate which versions of a
file are introduced by a revision, making this the base for
new calculations
* adds a small test for this
* removes the unused MissingText exception
* adds a new specific CouldntPullFileVersion exception
(Maybe this is too specific, or should be a subclass of some
other exception? The strongest rule I am sure of at the moment is
that we should throw a different class for each situation distinct
enough to need a different explanation.)
* causes fetch to check that weaves or knits received contain
everything they should after joining, and otherwise raise this
exception
* bonus addition to xml serializer, not currently used
--
Martin
-------------- next part --------------
=== modified file 'a/bzrlib/errors.py'
--- a/bzrlib/errors.py
+++ b/bzrlib/errors.py
@@ -381,6 +381,13 @@
BzrError.__init__(self, msg)
+class CouldntPullFileVersion(BzrNewError):
+ """Could not find version {%s} of {%s} to pull"""
+ def __init__(self, revision_id, file_id):
+ self.revision_id = revision_id
+ self.file_id = file_id
+
+
class HistoryMissing(BzrError):
def __init__(self, branch, object_type, object_id):
self.branch = branch
@@ -692,17 +699,6 @@
"""
-class MissingText(BzrNewError):
- """Branch %(base)s is missing revision %(text_revision)s of %(file_id)s"""
-
- def __init__(self, branch, text_revision, file_id):
- BzrNewError.__init__(self)
- self.branch = branch
- self.base = branch.base
- self.text_revision = text_revision
- self.file_id = file_id
-
-
class DuplicateKey(BzrNewError):
"""Key %(key)s is already present in map"""
=== modified file 'a/bzrlib/fetch.py'
--- a/bzrlib/fetch.py
+++ b/bzrlib/fetch.py
@@ -34,7 +34,7 @@
import bzrlib
import bzrlib.errors as errors
from bzrlib.errors import (InstallFailed, NoSuchRevision,
- MissingText)
+ CouldntPullFileVersion)
from bzrlib.trace import mutter
from bzrlib.progress import ProgressBar, ProgressPhase
from bzrlib.reconcile import RepoReconciler
@@ -157,10 +157,12 @@
def _fetch_weave_texts(self, revs):
texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
try:
- file_ids = self.from_repository.fileid_involved_by_set(revs)
+ to_fetch = {}
+ for file_id, file_revision_id in self.from_repository.file_versions_involved(revs):
+ to_fetch.setdefault(file_id, []).append(file_revision_id)
count = 0
- num_file_ids = len(file_ids)
- for file_id in file_ids:
+ num_file_ids = len(to_fetch)
+ for file_id, file_revisions in to_fetch.items():
texts_pb.update("fetch texts", count, num_file_ids)
count +=1
try:
@@ -176,6 +178,8 @@
None,
self.from_repository.get_transaction(),
self.to_repository.get_transaction())
+ to_weave = self.to_weaves.get_weave(file_id,
+ self.to_repository.get_transaction())
else:
# destination has contents, must merge
from_weave = self.from_weaves.get_weave(file_id,
@@ -183,6 +187,11 @@
# we fetch all the texts, because texts do
# not reference anything, and its cheap enough
to_weave.join(from_weave)
+ # now make sure it has everything we need
+ for file_revision_id in file_revisions:
+ if not to_weave.has_version(file_revision_id):
+ raise CouldntPullFileVersion(file_revision_id,
+ file_id)
finally:
texts_pb.finished()
=== modified file 'a/bzrlib/repository.py'
--- a/bzrlib/repository.py
+++ b/bzrlib/repository.py
@@ -326,6 +326,7 @@
# or better yet, change _fileid_involved_by_set so
# that it takes the inventory weave, rather than
# pulling it out by itself.
+ # mbp 20060321 -- which line?
return self._fileid_involved_by_set(changes)
def _fileid_involved_by_set(self, changes):
@@ -341,41 +342,52 @@
to have a single line per file/directory, and to have
fileid="" and revision="" on that line.
"""
+ r = set()
+ for file_id, file_revision_id in self.file_versions_involved(changes):
+ r.add(file_id)
+ return r
+
+ def file_versions_involved(self, revision_ids):
+ """Find which file versions are created by a group of of revisions.
+
+ :param revision_ids: A collection of revision ids to check.
+ :return: [(file_id, file_revision_id)] for each revision of the file
+ newly introduced by one of the revisions.
+
+ As with the other *_involved methods, this may return less than the
+ strictly minimal list of involved versions.
+ """
assert isinstance(self._format, (RepositoryFormat5,
RepositoryFormat6,
RepositoryFormat7,
RepositoryFormatKnit1)), \
"fileid_involved only supported for branches which store inventory as unnested xml"
-
w = self.get_inventory_weave()
- file_ids = set()
-
+ file_versions = []
# this code needs to read every line in every inventory for the
- # inventories [changes]. Seeing a line twice is ok. Seeing a line
+ # inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
# not pesent in one of those inventories is unnecessary and not
# harmful because we are filtering by the revision id marker in the
# inventory lines to only select file ids altered in one of those
# revisions. We dont need to see all lines in the inventory because
# only those added in an inventory in rev X can contain a revision=X
# line.
- for line in w.iter_lines_added_or_present_in_versions(changes):
+ for line in w.iter_lines_added_or_present_in_versions(revision_ids):
start = line.find('file_id="')+9
if start < 9: continue
end = line.find('"', start)
assert end>= 0
file_id = _unescape_xml(line[start:end])
-
- # check if file_id is already present
- if file_id in file_ids: continue
-
start = line.find('revision="')+10
if start < 10: continue
end = line.find('"', start)
assert end>= 0
revision_id = _unescape_xml(line[start:end])
- if revision_id in changes:
- file_ids.add(file_id)
- return file_ids
+ # TODO: perhaps we can just parse this single line using the usual
+ # xml parser mechanism? mbp 20060321
+ if revision_id in revision_ids:
+ file_versions.append((file_id, revision_id))
+ return file_versions
@needs_read_lock
def get_inventory_weave(self):
=== modified file 'a/bzrlib/tests/repository_implementations/test_fileid_involved.py'
--- a/bzrlib/tests/repository_implementations/test_fileid_involved.py
+++ b/bzrlib/tests/repository_implementations/test_fileid_involved.py
@@ -161,6 +161,12 @@
l = self.branch.repository.fileid_involved_by_set(set(["rev-<D>"]))
self.assertEquals( sorted(map( lambda x: x[0], l )), ["b"])
+ def test_file_versions_involved(self):
+ """Report which versions of a file are touched by a set of revisions"""
+
+ l = self.branch.repository.file_versions_involved(set(["rev-B"]))
+ self.assertEquals(l, [('a-file-id-2006-01-01-abcd', 'rev-B')])
+
def test_fileid_involved_compare(self):
l1 = self.branch.repository.fileid_involved_between_revs("rev-E", "rev-<D>")
=== modified file 'a/bzrlib/tests/test_fetch.py'
--- a/bzrlib/tests/test_fetch.py
+++ b/bzrlib/tests/test_fetch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2005 by Canonical Ltd
+# Copyright (C) 2005, 2006 by Canonical Ltd
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -18,10 +18,10 @@
import sys
from bzrlib.branch import Branch
-from bzrlib.bzrdir import BzrDir
+from bzrlib.bzrdir import BzrDir, BzrDirMetaFormat1
from bzrlib.builtins import merge
import bzrlib.errors
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import TestCaseWithTransport, TestCaseInTempDir
from bzrlib.tests.HTTPTestUtil import TestCaseWithWebserver
from bzrlib.tests.test_revision import make_branches
from bzrlib.trace import mutter
@@ -237,3 +237,64 @@
self.assertEqual(1, self._count_log_matches('branch-format', http_logs[0:1]))
self.assertEqual(1, self._count_log_matches('revision-history', http_logs[1:2]))
self.assertEqual(2, len(http_logs))
+
+
+class TestFetchDetectsLostHistory(TestCaseInTempDir):
+ """Detect missing dependent objects at fetch time.
+
+ The particular case is that a misbehaving http cache might serve old
+ versions of some files referred to by a revision. That should be
+ trapped at fetch time so that it does not propagate.
+ """
+
+ # XXX: test could usefully be repeated for various different formats, but
+ # will require some amount of specialization. Testing on only weaves
+ # should test the basic detection code.
+
+ def setUp(self):
+ super(TestFetchDetectsLostHistory, self).setUp()
+ os.mkdir('a')
+ bzrdir_a = BzrDirMetaFormat1().initialize('a')
+ repo_a = bzrdir_a.create_repository()
+ self.branch_a = bzrdir_a.create_branch()
+ wt_a = bzrdir_a.create_workingtree()
+ self.build_tree(['a/onefile', 'a/twofile'])
+ wt_a.add(['onefile'], ['onefile-id'])
+ wt_a.commit('initial commit', rev_id='rev-1')
+ wt_a.remove(['onefile'])
+ wt_a.add(['twofile'], ['twofile-id'])
+ wt_a.commit('second commit', rev_id='rev-2')
+ self._remove_file_history(repo_a, 'onefile-id')
+ self._build_destination_dir()
+
+ def _remove_file_history(self, repo, file_id):
+ """Remove versions for one file to simulate transport corruption"""
+ from bzrlib.weave import Weave
+ text_store = repo.text_store
+ self.assertTrue(text_store.has_id(file_id))
+ # replace with an empty weave, as if there was somehow an old copy
+ # left around
+ text_store._put_weave(file_id, Weave(), repo.get_transaction())
+
+ def _build_destination_dir(self):
+ os.mkdir('b')
+ self.bzrdir_b = BzrDirMetaFormat1().initialize('b')
+ self.repo_b = self.bzrdir_b.create_repository()
+ self.branch_b = self.bzrdir_b.create_branch()
+ self.wt_b = self.bzrdir_b.create_workingtree()
+
+ def test_10_missing_current_file_version(self):
+ """Try to pull a tree where we can't construct even the current working tree"""
+ # typically InstallFailed at the moment, but I don't know if we should
+ # specifically require this? -- mbp 20060321
+ self.assertRaises(bzrlib.errors.InstallFailed,
+ self.wt_b.pull,
+ self.branch_a, stop_revision='revid-1')
+
+ def test_20_missing_file_version(self):
+ """Try to pull changes where a file version is missing,
+ but not from the most recent tree.
+ """
+ self.assertRaises(bzrlib.errors.CouldntPullFileVersion,
+ self.wt_b.pull,
+ self.branch_a)
=== modified file 'a/bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py
+++ b/bzrlib/xml_serializer.py
@@ -71,3 +71,6 @@
def _read_element(self, f):
return ElementTree().parse(f)
+
+ def read_inventory_entry_from_string(self, xml_string):
+ return self._unpack_entry(self._read_element(xml_string))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 191 bytes
Desc: Digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060321/e0591485/attachment.pgp
More information about the bazaar
mailing list