Rev 4099: Handle inconsistent inventory data more gracefully at a small performance cost during fetch. in http://people.ubuntu.com/~robertc/baz2.0/fetch
Robert Collins
robertc at robertcollins.net
Tue Mar 10 07:47:34 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/fetch
------------------------------------------------------------
revno: 4099
revision-id: robertc at robertcollins.net-20090310074723-jgctuly1ziw23r7e
parent: pqm at pqm.ubuntu.com-20090309084556-9i2m12qlud2qcrtw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: fetch
timestamp: Tue 2009-03-10 18:47:23 +1100
message:
Handle inconsistent inventory data more gracefully at a small performance cost during fetch.
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-03-06 10:01:37 +0000
+++ b/bzrlib/repository.py 2009-03-10 07:47:23 +0000
@@ -1453,6 +1453,26 @@
result[key] = True
return result
+ def _inventory_xml_lines_for_keys(self, keys):
+ """Get a line iterator of the sort needed for findind references.
+
+ Not relevant for non-xml inventory repositories.
+
+ Ghosts in revision_keys are ignored.
+
+ :param revision_keys: The revision keys for the inventories to inspect.
+ :return: An iterator over (inventory line, revid) for the fulltexts of
+ all of the xml inventories specified by revision_keys.
+ """
+ stream = self.inventories.get_record_stream(keys, 'unordered', True)
+ for record in stream:
+ if record.storage_kind != 'absent':
+ chunks = record.get_bytes_as('chunked')
+ revid = record.key[-1]
+ lines = osutils.chunks_to_lines(chunks)
+ for line in lines:
+ yield line, revid
+
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
revision_ids):
"""Helper routine for fileids_altered_by_revision_ids.
@@ -1468,15 +1488,20 @@
revision_ids. Each altered file-ids has the exact revision_ids that
altered it listed explicitly.
"""
+ seen = set(self._find_text_key_references_from_xml_inventory_lines(
+ line_iterator).iterkeys())
+ # Note that revision_ids are revision keys.
+ parent_maps = self.revisions.get_parent_map(revision_ids)
+ parents = set()
+ map(parents.update, parent_maps.itervalues())
+ parents.difference_update(revision_ids)
+ parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
+ self._inventory_xml_lines_for_keys(parents)))
+ new_keys = seen - parent_seen
result = {}
setdefault = result.setdefault
- for key in \
- self._find_text_key_references_from_xml_inventory_lines(
- line_iterator).iterkeys():
- # once data is all ensured-consistent; then this is
- # if revision_id == version_id
- if key[-1:] in revision_ids:
- setdefault(key[0], set()).add(key[-1])
+ for key in new_keys:
+ setdefault(key[0], set()).add(key[-1])
return result
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
@@ -3165,10 +3190,7 @@
# We don't copy the text for the root node unless the
# target supports_rich_root.
continue
- # TODO: Do we need:
- # "if entry.revision == current_revision_id" ?
- if entry.revision == current_revision_id:
- text_keys.add((file_id, entry.revision))
+ text_keys.add((file_id, entry.revision))
revision = self.source.get_revision(current_revision_id)
pending_deltas.append((basis_id, delta,
current_revision_id, revision.parent_ids))
=== modified file 'bzrlib/tests/interrepository_implementations/test_fetch.py'
--- a/bzrlib/tests/interrepository_implementations/test_fetch.py 2009-01-17 01:30:58 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_fetch.py 2009-03-10 07:47:23 +0000
@@ -20,8 +20,10 @@
import bzrlib
from bzrlib import (
errors,
+ inventory,
+ osutils,
repository,
- osutils,
+ versionedfile,
)
from bzrlib.errors import (
NoSuchRevision,
@@ -73,6 +75,56 @@
repo_b = self.make_to_repository('b')
check_push_rev1(repo_b)
+ def test_fetch_inconsistent_last_changed_entries(self):
+ """If an inventory has odd data we should still get what it references.
+
+ This test tests that we do fetch a file text created in a revision not
+ being fetched, but referenced from the revision we are fetching when the
+ adjacent revisions to the one being fetched do not reference that text.
+ """
+ tree = self.make_branch_and_tree('source')
+ revid = tree.commit('old')
+ to_repo = self.make_to_repository('to_repo')
+ to_repo.fetch(tree.branch.repository, revid)
+ # Make a broken revision and fetch it.
+ source = tree.branch.repository
+ source.lock_write()
+ self.addCleanup(source.unlock)
+ source.start_write_group()
+ try:
+ # We need two revisions: OLD and NEW. NEW will claim to need a file
+ # 'FOO' changed in 'OLD'. OLD will not have that file at all.
+ source.texts.insert_record_stream([
+ versionedfile.FulltextContentFactory(('foo', revid), (), None,
+ 'contents')])
+ basis = source.revision_tree(revid)
+ parent_id = basis.path2id('')
+ entry = inventory.make_entry('file', 'foo-path', parent_id, 'foo')
+ entry.revision = revid
+ entry.text_size = len('contents')
+ entry.text_sha1 = osutils.sha_string('contents')
+ inv_sha1, _ = source.add_inventory_by_delta(revid, [
+ (None, 'foo-path', 'foo', entry)], 'new', [revid])
+ rev = Revision(timestamp=0,
+ timezone=None,
+ committer="Foo Bar <foo at example.com>",
+ message="Message",
+ inventory_sha1=inv_sha1,
+ revision_id='new',
+ parent_ids=[revid])
+ source.add_revision(rev.revision_id, rev)
+ except:
+ source.abort_write_group()
+ raise
+ else:
+ source.commit_write_group()
+ to_repo.fetch(source, 'new')
+ to_repo.lock_read()
+ self.addCleanup(to_repo.unlock)
+ self.assertEqual('contents',
+ to_repo.texts.get_record_stream([('foo', revid)],
+ 'unordered', True).next().get_bytes_as('fulltext'))
+
def test_fetch_missing_basis_text(self):
"""If fetching a delta, we should die if a basis is not present."""
tree = self.make_branch_and_tree('tree')
More information about the bazaar-commits
mailing list