Rev 4117: (robertc) Handle inconsistent inventories in fetch more robustly. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Mar 11 22:53:25 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4117
revision-id: pqm at pqm.ubuntu.com-20090311225320-7y2picz4muszgkma
parent: pqm at pqm.ubuntu.com-20090311220149-lp5k8bw0rqr2a7sx
parent: robertc at robertcollins.net-20090311064400-amcoxhotnw63d9vl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-03-11 22:53:20 +0000
message:
(robertc) Handle inconsistent inventories in fetch more robustly.
(Robert Collins)
modified:
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/interrepository_implementations/test_fetch.py test_fetch.py-20080425213627-j60cjh782ufm83ry-1
bzrlib/tests/per_repository/test_fileid_involved.py test_file_involved.py-20051215205901-728a172d1014daaa
bzrlib/tests/test_fetch.py testfetch.py-20050825090644-f73e07e7dfb1765a
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
------------------------------------------------------------
revno: 4098.4.3
revision-id: robertc at robertcollins.net-20090311064400-amcoxhotnw63d9vl
parent: robertc at robertcollins.net-20090311052614-v57k6mz9hqcxzgf6
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Wed 2009-03-11 17:44:00 +1100
message:
Change fetch effort tests to reflect the new change to read the adjacent inventories to ensure accurate fetching.
modified:
bzrlib/tests/test_fetch.py testfetch.py-20050825090644-f73e07e7dfb1765a
------------------------------------------------------------
revno: 4098.4.2
revision-id: robertc at robertcollins.net-20090311052614-v57k6mz9hqcxzgf6
parent: robertc at robertcollins.net-20090310074723-jgctuly1ziw23r7e
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Wed 2009-03-11 16:26:14 +1100
message:
Weaves were reporting way too many lines altered... fix so that the fetch changes actually emit the right revisions and tests pass.
modified:
bzrlib/tests/per_repository/test_fileid_involved.py test_file_involved.py-20051215205901-728a172d1014daaa
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
------------------------------------------------------------
revno: 4098.4.1
revision-id: robertc at robertcollins.net-20090310074723-jgctuly1ziw23r7e
parent: pqm at pqm.ubuntu.com-20090309084556-9i2m12qlud2qcrtw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Tue 2009-03-10 18:47:23 +1100
message:
Handle inconsistent inventory data more gracefully at a small performance cost during fetch.
modified:
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/interrepository_implementations/test_fetch.py test_fetch.py-20080425213627-j60cjh782ufm83ry-1
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-03-06 10:01:37 +0000
+++ b/bzrlib/repository.py 2009-03-10 07:47:23 +0000
@@ -1453,6 +1453,26 @@
result[key] = True
return result
+ def _inventory_xml_lines_for_keys(self, keys):
+ """Get a line iterator of the sort needed for findind references.
+
+ Not relevant for non-xml inventory repositories.
+
+ Ghosts in revision_keys are ignored.
+
+ :param revision_keys: The revision keys for the inventories to inspect.
+ :return: An iterator over (inventory line, revid) for the fulltexts of
+ all of the xml inventories specified by revision_keys.
+ """
+ stream = self.inventories.get_record_stream(keys, 'unordered', True)
+ for record in stream:
+ if record.storage_kind != 'absent':
+ chunks = record.get_bytes_as('chunked')
+ revid = record.key[-1]
+ lines = osutils.chunks_to_lines(chunks)
+ for line in lines:
+ yield line, revid
+
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
revision_ids):
"""Helper routine for fileids_altered_by_revision_ids.
@@ -1468,15 +1488,20 @@
revision_ids. Each altered file-ids has the exact revision_ids that
altered it listed explicitly.
"""
+ seen = set(self._find_text_key_references_from_xml_inventory_lines(
+ line_iterator).iterkeys())
+ # Note that revision_ids are revision keys.
+ parent_maps = self.revisions.get_parent_map(revision_ids)
+ parents = set()
+ map(parents.update, parent_maps.itervalues())
+ parents.difference_update(revision_ids)
+ parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
+ self._inventory_xml_lines_for_keys(parents)))
+ new_keys = seen - parent_seen
result = {}
setdefault = result.setdefault
- for key in \
- self._find_text_key_references_from_xml_inventory_lines(
- line_iterator).iterkeys():
- # once data is all ensured-consistent; then this is
- # if revision_id == version_id
- if key[-1:] in revision_ids:
- setdefault(key[0], set()).add(key[-1])
+ for key in new_keys:
+ setdefault(key[0], set()).add(key[-1])
return result
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
@@ -3165,10 +3190,7 @@
# We don't copy the text for the root node unless the
# target supports_rich_root.
continue
- # TODO: Do we need:
- # "if entry.revision == current_revision_id" ?
- if entry.revision == current_revision_id:
- text_keys.add((file_id, entry.revision))
+ text_keys.add((file_id, entry.revision))
revision = self.source.get_revision(current_revision_id)
pending_deltas.append((basis_id, delta,
current_revision_id, revision.parent_ids))
=== modified file 'bzrlib/tests/interrepository_implementations/test_fetch.py'
--- a/bzrlib/tests/interrepository_implementations/test_fetch.py 2009-01-17 01:30:58 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_fetch.py 2009-03-10 07:47:23 +0000
@@ -20,8 +20,10 @@
import bzrlib
from bzrlib import (
errors,
+ inventory,
+ osutils,
repository,
- osutils,
+ versionedfile,
)
from bzrlib.errors import (
NoSuchRevision,
@@ -73,6 +75,56 @@
repo_b = self.make_to_repository('b')
check_push_rev1(repo_b)
+ def test_fetch_inconsistent_last_changed_entries(self):
+ """If an inventory has odd data we should still get what it references.
+
+ This test tests that we do fetch a file text created in a revision not
+ being fetched, but referenced from the revision we are fetching when the
+ adjacent revisions to the one being fetched do not reference that text.
+ """
+ tree = self.make_branch_and_tree('source')
+ revid = tree.commit('old')
+ to_repo = self.make_to_repository('to_repo')
+ to_repo.fetch(tree.branch.repository, revid)
+ # Make a broken revision and fetch it.
+ source = tree.branch.repository
+ source.lock_write()
+ self.addCleanup(source.unlock)
+ source.start_write_group()
+ try:
+ # We need two revisions: OLD and NEW. NEW will claim to need a file
+ # 'FOO' changed in 'OLD'. OLD will not have that file at all.
+ source.texts.insert_record_stream([
+ versionedfile.FulltextContentFactory(('foo', revid), (), None,
+ 'contents')])
+ basis = source.revision_tree(revid)
+ parent_id = basis.path2id('')
+ entry = inventory.make_entry('file', 'foo-path', parent_id, 'foo')
+ entry.revision = revid
+ entry.text_size = len('contents')
+ entry.text_sha1 = osutils.sha_string('contents')
+ inv_sha1, _ = source.add_inventory_by_delta(revid, [
+ (None, 'foo-path', 'foo', entry)], 'new', [revid])
+ rev = Revision(timestamp=0,
+ timezone=None,
+ committer="Foo Bar <foo at example.com>",
+ message="Message",
+ inventory_sha1=inv_sha1,
+ revision_id='new',
+ parent_ids=[revid])
+ source.add_revision(rev.revision_id, rev)
+ except:
+ source.abort_write_group()
+ raise
+ else:
+ source.commit_write_group()
+ to_repo.fetch(source, 'new')
+ to_repo.lock_read()
+ self.addCleanup(to_repo.unlock)
+ self.assertEqual('contents',
+ to_repo.texts.get_record_stream([('foo', revid)],
+ 'unordered', True).next().get_bytes_as('fulltext'))
+
def test_fetch_missing_basis_text(self):
"""If fetching a delta, we should die if a basis is not present."""
tree = self.make_branch_and_tree('tree')
=== modified file 'bzrlib/tests/per_repository/test_fileid_involved.py'
--- a/bzrlib/tests/per_repository/test_fileid_involved.py 2009-01-17 01:30:58 +0000
+++ b/bzrlib/tests/per_repository/test_fileid_involved.py 2009-03-11 05:26:14 +0000
@@ -141,6 +141,7 @@
print set(self.branch.repository.get_ancestry(new)).difference(set(self.branch.repository.get_ancestry(old)))
self.branch.lock_read()
self.addCleanup(self.branch.unlock)
+ self.branch.repository.fileids_altered_by_revision_ids(["rev-J","rev-K"])
self.assertEqual(
{'b-file-id-2006-01-01-defg':set(['rev-J']),
'c-funky<file-id>quiji%bo':set(['rev-K'])
=== modified file 'bzrlib/tests/test_fetch.py'
--- a/bzrlib/tests/test_fetch.py 2009-03-02 04:45:02 +0000
+++ b/bzrlib/tests/test_fetch.py 2009-03-11 06:44:00 +0000
@@ -330,21 +330,19 @@
class TestKnitToPackFetch(TestCaseWithTransport):
- def find_get_record_stream(self, calls):
- """In a list of calls, find 'get_record_stream' calls.
+ def find_get_record_stream(self, calls, expected_count=1):
+ """In a list of calls, find the last 'get_record_stream'.
- This also ensures that there is only one get_record_stream call.
+ :param expected_count: The number of calls we should exepect to find.
+ If a different number is found, an assertion is raised.
"""
get_record_call = None
+ call_count = 0
for call in calls:
if call[0] == 'get_record_stream':
- self.assertIs(None, get_record_call,
- "there should only be one call to"
- " get_record_stream")
+ call_count += 1
get_record_call = call
- self.assertIsNot(None, get_record_call,
- "there should be exactly one call to "
- " get_record_stream")
+ self.assertEqual(expected_count, call_count)
return get_record_call
def test_fetch_with_deltas_no_delta_closure(self):
@@ -370,8 +368,8 @@
target._format._fetch_order, False),
self.find_get_record_stream(source.texts.calls))
self.assertEqual(('get_record_stream', [('rev-one',)],
- target._format._fetch_order, False),
- self.find_get_record_stream(source.inventories.calls))
+ target._format._fetch_order, False),
+ self.find_get_record_stream(source.inventories.calls, 2))
self.assertEqual(('get_record_stream', [('rev-one',)],
target._format._fetch_order, False),
self.find_get_record_stream(source.revisions.calls))
@@ -414,8 +412,8 @@
target._format._fetch_order, True),
self.find_get_record_stream(source.texts.calls))
self.assertEqual(('get_record_stream', [('rev-one',)],
- target._format._fetch_order, True),
- self.find_get_record_stream(source.inventories.calls))
+ target._format._fetch_order, True),
+ self.find_get_record_stream(source.inventories.calls, 2))
self.assertEqual(('get_record_stream', [('rev-one',)],
target._format._fetch_order, True),
self.find_get_record_stream(source.revisions.calls))
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2009-02-23 15:29:35 +0000
+++ b/bzrlib/weave.py 2009-03-11 05:26:14 +0000
@@ -575,10 +575,7 @@
version_ids = self.versions()
version_ids = set(version_ids)
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
- # if inserted not in version_ids then it was inserted before the
- # versions we care about, but because weaves cannot represent ghosts
- # properly, we do not filter down to that
- # if inserted not in version_ids: continue
+ if inserted not in version_ids: continue
if line[-1] != '\n':
yield line + '\n', inserted
else:
More information about the bazaar-commits
mailing list