[MERGE][0.8] fix fetch to raise RevisionNotPresent if a file version is not present.
Martin Pool
mbp at sourcefrog.net
Wed May 3 14:14:40 BST 2006
> === modified file 'a/bzrlib/fetch.py'
> --- a/bzrlib/fetch.py
> +++ b/bzrlib/fetch.py
> @@ -157,10 +157,10 @@
> def _fetch_weave_texts(self, revs):
> texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
> try:
> - file_ids = self.from_repository.fileid_involved_by_set
> (revs)
> + file_ids =
> self.from_repository.fileids_altered_by_revision_ids(revs)
> count = 0
> num_file_ids = len(file_ids)
> - for file_id in file_ids:
> + for file_id, required_versions in file_ids.items():
> texts_pb.update("fetch texts", count, num_file_ids)
> count +=1
> to_weave = self.to_weaves.get_weave_or_empty(file_id,
> @@ -169,7 +169,7 @@
> self.from_repository.get_transaction())
> # we fetch all the texts, because texts do
> # not reference anything, and its cheap enough
> - to_weave.join(from_weave)
> + to_weave.join(from_weave,
> version_ids=required_versions)
> finally:
> texts_pb.finished()
Nice.
Thanks for cleaning up the other wierd fileid_involved* variations,
I'd been meaning to do that.
> === modified file 'a/bzrlib/repository.py'
> --- a/bzrlib/repository.py
> +++ b/bzrlib/repository.py
> @@ -295,89 +295,46 @@
> signature,
>
> self.get_transaction())
> - def fileid_involved_between_revs(self, from_revid, to_revid):
> - """Find file_id(s) which are involved in the changes
> between revisions.
> -
> - This determines the set of revisions which are involved,
> and then
> - finds all file ids affected by those revisions.
> - """
> - w = self.get_inventory_weave()
> - from_set = set(w.get_ancestry(from_revid))
> - to_set = set(w.get_ancestry(to_revid))
> - changed = to_set.difference(from_set)
> - return self._fileid_involved_by_set(changed)
> -
> - def fileid_involved(self, last_revid=None):
> - """Find all file_ids modified in the ancestry of last_revid.
> -
> - :param last_revid: If None, last_revision() will be used.
> - """
> - w = self.get_inventory_weave()
> - if not last_revid:
> - changed = set(w.versions())
> - else:
> - changed = set(w.get_ancestry(last_revid))
> - return self._fileid_involved_by_set(changed)
> -
> - def fileid_involved_by_set(self, changes):
> - """Find all file_ids modified by the set of revisions
> passed in.
> -
> - :param changes: A set() of revision ids
> - """
> - # TODO: jam 20060119 This line does *nothing*, remove it.
> - # or better yet, change _fileid_involved_by_set so
> - # that it takes the inventory weave, rather than
> - # pulling it out by itself.
> - return self._fileid_involved_by_set(changes)
> -
> - def _fileid_involved_by_set(self, changes):
> - """Find the set of file-ids affected by the set of revisions.
> -
> - :param changes: A set() of revision ids.
> - :return: A set() of file ids.
> -
> - This peaks at the Weave, interpreting each line, looking to
> - see if it mentions one of the revisions. And if so, includes
> - the file id mentioned.
> - This expects both the Weave format, and the serialization
> - to have a single line per file/directory, and to have
> - fileid="" and revision="" on that line.
> + def fileids_altered_by_revision_ids(self, revision_ids):
> + """Find the file ids and versions affected by revisions.
> +
> + :param revisions: an iterable containing revision ids.
> + :return: a dictionary mapping altered file-ids to an
> iterable of
> + revision_ids. Each altered file-ids has the exact
> revision_ids that
> + altered it listed explicitly.
> """
Is it the revision_ids that altered it, or is it the new versions
that were introduced? (They can differ when e.g. a merge brings in a
newly added file.)
Actually I'm not quite sure this code handles this case properly -
you filter it so that it returns only revisions matching the
revision_ids parameter, but that test may be a bit weak to make sure
we really pull all required versions.
So, merge if you're sure this is actually handled properly, otherwise
let's try to fix it.
> assert isinstance(self._format, (RepositoryFormat5,
> RepositoryFormat6,
> RepositoryFormat7,
> RepositoryFormatKnit1)), \
> "fileid_involved only supported for branches which
> store inventory as unnested xml"
> -
> + selected_revision_ids = set(revision_ids)
> w = self.get_inventory_weave()
> - file_ids = set()
> -
> - # this code needs to read every line in every inventory
> for the
> - # inventories [changes]. Seeing a line twice is ok. Seeing
> a line
> - # not pesent in one of those inventories is unnecessary
> and not
> + result = {}
> +
> + # this code needs to read every new line in every
> inventory for the
> + # inventories [revision_ids]. Seeing a line twice is ok.
> Seeing a line
> + # not pesent in one of those inventories is unnecessary
> but not
> # harmful because we are filtering by the revision id
> marker in the
> - # inventory lines to only select file ids altered in one
> of those
> + # inventory lines : we only select file ids altered in one
> of those
> # revisions. We dont need to see all lines in the
> inventory because
> # only those added in an inventory in rev X can contain a
> revision=X
> # line.
> - for line in w.iter_lines_added_or_present_in_versions
> (changes):
> + for line in w.iter_lines_added_or_present_in_versions
> (selected_revision_ids):
> start = line.find('file_id="')+9
> if start < 9: continue
> end = line.find('"', start)
> assert end>= 0
> file_id = _unescape_xml(line[start:end])
> - # check if file_id is already present
> - if file_id in file_ids: continue
> -
> start = line.find('revision="')+10
> if start < 10: continue
> end = line.find('"', start)
> assert end>= 0
> revision_id = _unescape_xml(line[start:end])
> - if revision_id in changes:
> - file_ids.add(file_id)
> - return file_ids
> + if revision_id in selected_revision_ids:
> + result.setdefault(file_id, set()).add(revision_id)
> + return result
> @needs_read_lock
> def get_inventory_weave(self):
>
--
Martin
More information about the bazaar
mailing list