[MERGE][0.8] fix fetch to raise RevisionNotPresent if a file version is not present.

Martin Pool mbp at sourcefrog.net
Wed May 3 14:14:40 BST 2006


> === modified file 'a/bzrlib/fetch.py'
> --- a/bzrlib/fetch.py	
> +++ b/bzrlib/fetch.py	
> @@ -157,10 +157,10 @@
>      def _fetch_weave_texts(self, revs):
>          texts_pb = bzrlib.ui.ui_factory.nested_progress_bar()
>          try:
> -            file_ids = self.from_repository.fileid_involved_by_set 
> (revs)
> +            file_ids =  
> self.from_repository.fileids_altered_by_revision_ids(revs)
>              count = 0
>              num_file_ids = len(file_ids)
> -            for file_id in file_ids:
> +            for file_id, required_versions in file_ids.items():
>                  texts_pb.update("fetch texts", count, num_file_ids)
>                  count +=1
>                  to_weave = self.to_weaves.get_weave_or_empty(file_id,
> @@ -169,7 +169,7 @@
>                      self.from_repository.get_transaction())
>                  # we fetch all the texts, because texts do
>                  # not reference anything, and its cheap enough
> -                to_weave.join(from_weave)
> +                to_weave.join(from_weave,  
> version_ids=required_versions)
>          finally:
>              texts_pb.finished()

Nice.

Thanks for cleaning up the other wierd fileid_involved* variations,  
I'd been meaning to do that.

> === modified file 'a/bzrlib/repository.py'
> --- a/bzrlib/repository.py	
> +++ b/bzrlib/repository.py	
> @@ -295,89 +295,46 @@
>                                                           signature,
>                                                            
> self.get_transaction())
> -    def fileid_involved_between_revs(self, from_revid, to_revid):
> -        """Find file_id(s) which are involved in the changes  
> between revisions.
> -
> -        This determines the set of revisions which are involved,  
> and then
> -        finds all file ids affected by those revisions.
> -        """
> -        w = self.get_inventory_weave()
> -        from_set = set(w.get_ancestry(from_revid))
> -        to_set = set(w.get_ancestry(to_revid))
> -        changed = to_set.difference(from_set)
> -        return self._fileid_involved_by_set(changed)
> -
> -    def fileid_involved(self, last_revid=None):
> -        """Find all file_ids modified in the ancestry of last_revid.
> -
> -        :param last_revid: If None, last_revision() will be used.
> -        """
> -        w = self.get_inventory_weave()
> -        if not last_revid:
> -            changed = set(w.versions())
> -        else:
> -            changed = set(w.get_ancestry(last_revid))
> -        return self._fileid_involved_by_set(changed)
> -
> -    def fileid_involved_by_set(self, changes):
> -        """Find all file_ids modified by the set of revisions  
> passed in.
> -
> -        :param changes: A set() of revision ids
> -        """
> -        # TODO: jam 20060119 This line does *nothing*, remove it.
> -        #       or better yet, change _fileid_involved_by_set so
> -        #       that it takes the inventory weave, rather than
> -        #       pulling it out by itself.
> -        return self._fileid_involved_by_set(changes)
> -
> -    def _fileid_involved_by_set(self, changes):
> -        """Find the set of file-ids affected by the set of revisions.
> -
> -        :param changes: A set() of revision ids.
> -        :return: A set() of file ids.
> -
> -        This peaks at the Weave, interpreting each line, looking to
> -        see if it mentions one of the revisions. And if so, includes
> -        the file id mentioned.
> -        This expects both the Weave format, and the serialization
> -        to have a single line per file/directory, and to have
> -        fileid="" and revision="" on that line.
> +    def fileids_altered_by_revision_ids(self, revision_ids):
> +        """Find the file ids and versions affected by revisions.
> +
> +        :param revisions: an iterable containing revision ids.
> +        :return: a dictionary mapping altered file-ids to an  
> iterable of
> +        revision_ids. Each altered file-ids has the exact  
> revision_ids that
> +        altered it listed explicitly.
>          """

Is it the revision_ids that altered it, or is it the new versions  
that were introduced?  (They can differ when e.g. a merge brings in a  
newly added file.)

Actually I'm not quite sure this code handles this case properly -  
you filter it so that it returns only revisions matching the  
revision_ids parameter, but that test may be a bit weak to make sure  
we really pull all required versions.

So, merge if you're sure this is actually handled properly, otherwise  
let's try to fix it.


>          assert isinstance(self._format, (RepositoryFormat5,
>                                           RepositoryFormat6,
>                                           RepositoryFormat7,
>                                           RepositoryFormatKnit1)), \
>              "fileid_involved only supported for branches which  
> store inventory as unnested xml"
> -
> +        selected_revision_ids = set(revision_ids)
>          w = self.get_inventory_weave()
> -        file_ids = set()
> -
> -        # this code needs to read every line in every inventory  
> for the
> -        # inventories [changes]. Seeing a line twice is ok. Seeing  
> a line
> -        # not pesent in one of those inventories is unnecessary  
> and not
> +        result = {}
> +
> +        # this code needs to read every new line in every  
> inventory for the
> +        # inventories [revision_ids]. Seeing a line twice is ok.  
> Seeing a line
> +        # not pesent in one of those inventories is unnecessary  
> but not
>          # harmful because we are filtering by the revision id  
> marker in the
> -        # inventory lines to only select file ids altered in one  
> of those
> +        # inventory lines : we only select file ids altered in one  
> of those
>          # revisions. We dont need to see all lines in the  
> inventory because
>          # only those added in an inventory in rev X can contain a  
> revision=X
>          # line.
> -        for line in w.iter_lines_added_or_present_in_versions 
> (changes):
> +        for line in w.iter_lines_added_or_present_in_versions 
> (selected_revision_ids):
>              start = line.find('file_id="')+9
>              if start < 9: continue
>              end = line.find('"', start)
>              assert end>= 0
>              file_id = _unescape_xml(line[start:end])
> -            # check if file_id is already present
> -            if file_id in file_ids: continue
> -
>              start = line.find('revision="')+10
>              if start < 10: continue
>              end = line.find('"', start)
>              assert end>= 0
>              revision_id = _unescape_xml(line[start:end])
> -            if revision_id in changes:
> -                file_ids.add(file_id)
> -        return file_ids
> +            if revision_id in selected_revision_ids:
> +                result.setdefault(file_id, set()).add(revision_id)
> +        return result
>      @needs_read_lock
>      def get_inventory_weave(self):
>


-- 
Martin







More information about the bazaar mailing list